In [1]:
#!/usr/bin/env python
from __future__ import print_function

import skimage as skimage
from skimage import transform, color, exposure
from skimage.viewer import ImageViewer
import random
import sys
import os
from random import choice
import numpy as np
from collections import deque
import time
import tensorflow as tf
import json
from tensorflow.keras import models

from tensorflow.keras import Model
from tensorflow.keras import layers
from tensorflow.keras import optimizers
from keras import backend as K
import vizdoom as vzd
from vizdoom import DoomGame, ScreenResolution
from vizdoom import *
import itertools as it
from time import sleep
from time import time


#from networks import Networks

  from .core import *


In [2]:
if len(tf.config.experimental.list_physical_devices('GPU')) > 1:
    print("GPU available")
    DEVICE = "/gpu:0"
else:
    print("No GPU available")
    DEVICE = "/cpu:0"

No GPU available


In [3]:
def preprocessImg(img, size):

    img = np.rollaxis(img, 0, 2)    # It becomes (640, 480, 3) #2
    img = skimage.transform.resize(img, size)
    img = skimage.color.rgb2gray(img) 

    return img

In [4]:

teststart = time()

In [5]:
#@tf.function
def policy_reinforce(input_shape, action_size, learning_rate):
    """
    Model for REINFORCE
    """

    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Conv2D(32, 8, strides=(4,4), input_shape=(input_shape)))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.ReLU())
    model.add(tf.keras.layers.Conv2D(64, 4, strides=(2,2)))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.ReLU())
    model.add(tf.keras.layers.Conv2D(64, 3, 3, padding="same"))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.ReLU())
    
    model.add(tf.keras.layers.Flatten())
    
    model.add(tf.keras.layers.Dense(64))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.ReLU())
    model.add(tf.keras.layers.Dense(32))
    model.add(tf.keras.layers.BatchNormalization())
    model.add(tf.keras.layers.ReLU())
    model.add(tf.keras.layers.Dense(action_size, activation='softmax'))

    adam = tf.keras.optimizers.Adam(lr=learning_rate)
    model.compile(loss='categorical_crossentropy',optimizer=adam)
    model.summary()
    return model

In [6]:
#with tf.device(DEVICE):
class REINFORCEAgent:
    
    def __init__(self, state_size, action_size):
        # get size of state and action
        self.state_size = state_size
        self.action_size = action_size
        self.observe = 0
        self.frame_per_action = 4 # Frame skipping

        # These are hyper parameters for the Policy Gradient
        self.discount_factor = 0.99
        self.learning_rate = 0.0001

        # Model for policy network
        self.model = True

        # Store episode states, actions and rewards
        self.states, self.actions, self.rewards = [], [], [] #, [], [], [], [], [], [], [], [], []

        # Performance Statistics
        self.stats_window_size= 50 # window size for computing rolling statistics
        self.mavg_score = [] # Moving Average of Survival Time
        self.var_score = [] # Variance of Survival Time
        self.mavg_pistol_left = [] # Moving Average of Ammo used
        self.mavg_kill_counts = [] # Moving Average of Kill Counts
        self.mavg_shotgun_left = []
        self.mavg_minigun_left = []
        self.mavg_plasma_left = []
        self.mavg_rocket_left = []
        self.mavg_secret_left = []
        self.mavg_hit_count = []
        self.mavg_damage_given = []
        self.mavg_item_collected = []
        self.mavg_armor = []

    # Use the output of policy network, pick action stochastically (Stochastic Policy)
    
    def get_action(self, state):
        policy = self.model.predict(state).flatten()
        return np.random.choice(self.action_size, 1, p=policy)[0], policy

    # Instead agent uses sample returns for evaluating policy
    # Use TD(1) i.e. Monte Carlo updates 
    
    def discount_rewards(self, rewards):
        discounted_rewards = np.zeros_like(rewards)
        running_add = 0
        for t in reversed(range(0, len(rewards))):
            if rewards[t] != 0:
                running_add = 0
            running_add = running_add * self.discount_factor + rewards[t]
            discounted_rewards[t] = running_add
        return discounted_rewards

    # save <s, a ,r> of each step
    
    def append_sample(self, state, action, reward):
        self.states.append(state)
        self.rewards.append(reward)
        self.actions.append(action)

    # update policy network every episode
    
    def train_model(self):
        episode_length = len(self.states)

        discounted_rewards = self.discount_rewards(self.rewards)
        # Standardized discounted rewards
        discounted_rewards -= np.mean(discounted_rewards) 
        if np.std(discounted_rewards):
            discounted_rewards /= np.std(discounted_rewards)
        else:
            self.states, self.actions, self.rewards = [], [], [] #, [], [], [], [], [], [], [], [], []
            print ('std = 0!')
            return 0

        update_inputs = np.zeros(((episode_length,) + self.state_size)) # Episode_lengthx64x64x4
        # Similar to one-hot target but the "1" is replaced by discounted_rewards R_t
        advantages = np.zeros((episode_length, self.action_size))

        # Episode length is like the minibatch size in DQN
        for i in range(episode_length):
            update_inputs[i,:,:,:] = self.states[i]#[i,:,:,:] [i,:,:,:,:,:,:,:,:,:,:,:,:]
            advantages[i][self.actions[i]] = discounted_rewards[i]
        
        callbacks = [tf.keras.callbacks.TensorBoard(log_dir="/home/spillingvoid/Downloads/programs/Doom/statistics/", 
                                                    histogram_freq=1, write_graph=True),
                tf.keras.callbacks.ModelCheckpoint(filepath="/home/spillingvoid/Downloads/programs/Doom/models/testrfdoom.h5",
                save_weights_only=True),
                    ]
        loss = self.model.fit(update_inputs, advantages, epochs=1, callbacks=callbacks, verbose=2)
        self.states, self.actions, self.rewards = [], [], [] #, [], [], [], [], [], [], [], [], []

        return loss.history['loss']

    
    def shape_reward(self, r_t, misc, prev_misc, t):
        
        # Check any kill count
        if (misc[0] > prev_misc[0]):
            r_t = r_t + 310

        if (misc[1] < prev_misc[1]): # Use pistol
            r_t = r_t - 1
        if (misc[1] > prev_misc[1]):
            r_t = r_t + 1
        
        if (misc[2] < prev_misc[2]): # Loss HEALTH
            r_t = r_t - 2
        if (misc[2] > prev_misc[2]):
            r_t = r_t + 2.25
        
        if (misc[3] < prev_misc[3]): # Loss shotgun
            r_t = r_t - 3
        if (misc[3] < prev_misc[3]):
            r_t = r_t +3
        
        if (misc[4] < prev_misc[4]): # Loss minigun
            r_t = r_t - 1
        if (misc[4] < prev_misc[4]):
            r_t = r_t + 1
        
        if (misc[5] < prev_misc[5]): # plasma
            r_t = r_t - 2
        if (misc[5] < prev_misc[5]):
            r_t = r_t + 2
            
        if (misc[6] < prev_misc[6]): # rocket
            r_t = r_t - 5
        if (misc[6] > prev_misc[6]):
            r_t = r_t + 5
            
        if (misc[7] > prev_misc[7]): # secrets
            r_t = r_t + 15 
                    
        if (misc[8] > prev_misc[8]): # hitcount
            r_t = r_t + 5
            
        if (misc[9] > prev_misc[9]): # hits taken
            r_t = r_t - 5
        
        if (misc[10] > prev_misc[10]): # items picked up
            r_t = r_t + 2
        
        if (misc[11] < prev_misc[11]): # armor
            r_t = r_t - 1
        if (misc[11] > prev_misc[11]):
            r_t = r_t + 1.5
        
        print("Shaping Reward", r_t)
            
        return r_t

In [7]:
if __name__ == "__main__":

    # Avoid Tensorflow eats up GPU memory
    #config = tf.ConfigProto()
    #config.gpu_options.allow_growth = True
    #sess = tf.Session(config=config)
    #K.set_session(sess)
    
    with tf.device(DEVICE):
        game = vzd.DoomGame()
        game.load_config("/home/spillingvoid/Downloads/programs/Doom/scenarios/Doom32.cfg")
        game.add_available_game_variable(vzd.GameVariable.KILLCOUNT)
        game.add_available_game_variable(vzd.GameVariable.AMMO2)
        game.add_available_game_variable(vzd.GameVariable.HEALTH)
        game.add_available_game_variable(vzd.GameVariable.AMMO3)
        game.add_available_game_variable(vzd.GameVariable.AMMO4)
        game.add_available_game_variable(vzd.GameVariable.AMMO5)
        game.add_available_game_variable(vzd.GameVariable.AMMO6)
        game.add_available_game_variable(vzd.GameVariable.SECRETCOUNT)
        game.add_available_game_variable(vzd.GameVariable.HITCOUNT)
        game.add_available_game_variable(vzd.GameVariable.HITS_TAKEN)
        game.add_available_game_variable(vzd.GameVariable.ITEMCOUNT)
        game.add_available_game_variable(vzd.GameVariable.ARMOR)

        
        game.set_window_visible(True)
        game.set_mode(vzd.Mode.PLAYER)
        game.set_screen_format(vzd.ScreenFormat.GRAY8)
        game.set_screen_resolution(vzd.ScreenResolution.RES_640X480)
        game.get_available_buttons_size()
        game.init()

    # Maximum number of episodes
        max_episodes = 8 #128
    
        game.new_episode()
        game_state = game.get_state()
        misc = game_state.game_variables  # [KILLCOUNT, AMMO, HEALTH]
        print("misc", misc)
        prev_misc = misc

        action_size = game.get_available_buttons_size()

        img_rows , img_cols = 30, 45
    # Convert image into Black and white
        img_channels = 4 # We stack 4 frames
        #model = tf.keras.models.load_model("/home/spillingvoid/Downloads/programs/Doom/models/reinforce.h5")
        
        state_size = (img_rows, img_cols, img_channels)
        agent = REINFORCEAgent(state_size, action_size)

        agent.model = policy_reinforce(state_size, action_size, agent.learning_rate)

    # Start training
        GAME = 0
        t = 0
        max_life = 0 # Maximum episode life (Proxy for agent performance)

    # Buffer to compute rolling statistics 
        life_buffer, pistol_buffer, kills_buffer, shotgun_buffer, minigun_buffer, plasma_buffer, rocket_buffer, secret_buffer, hit_buffer, damtaken_buffer, item_buffer, armor_buffer = [], [], [], [], [], [], [], [], [], [], [], []

        
        for i in range(max_episodes):

            game.new_episode()
            game_state = game.get_state()
            misc = game_state.game_variables 
            prev_misc = misc

            x_t = game_state.screen_buffer # 480 x 640
            x_t = preprocessImg(x_t, size=(img_rows, img_cols))
            s_t = np.stack(([x_t]*4), axis=2) # It becomes 64x64x4 axis=2
            s_t = np.expand_dims(s_t, axis=0) # 1x64x64x4

            life = 0 # Episode life

            while not game.is_episode_finished():

                loss = 0 # Training Loss at each update
                r_t = 0 # Initialize reward at time t
                a_t = np.zeros([action_size]) # Initialize action at time t

                x_t = game_state.screen_buffer
                x_t = preprocessImg(x_t, size=(img_rows, img_cols))
                x_t = np.reshape(x_t, (1, img_rows, img_cols, 1))
                s_t = np.append(x_t, s_t[:, :, :, :3], axis=3) #x_t, s_t[:, :, :, :3], #axis = 3 
                
            # Sample action from stochastic softmax policy
                action_idx, policy  = agent.get_action(s_t)
                a_t[action_idx] = 1 

                a_t = a_t.astype(int)
                game.set_action(a_t.tolist())
                skiprate = agent.frame_per_action # Frame Skipping = 4
                game.advance_action(skiprate)

                r_t = game.get_last_reward()  # Each frame we get reward of 0.1, so 4 frames will be 0.4
            # Check if episode is terminated
                is_terminated = game.is_episode_finished()

                if (is_terminated):
                # Save max_life
                    if (life > max_life):
                        max_life = life 
                    life_buffer.append(life)
                    pistol_buffer.append(misc[1])
                    kills_buffer.append(misc[2])
                    shotgun_buffer.append(misc[3])
                    minigun_buffer.append(misc[4])
                    plasma_buffer.append(misc[5])
                    rocket_buffer.append(misc[6])
                    secret_buffer.append(misc[7])
                    hit_buffer.append(misc[8])
                    damtaken_buffer.append(misc[9])
                    item_buffer.append(misc[10]) 
                    armor_buffer.append(misc[11])
                    print ("Episode Finish ", prev_misc, policy)
                else:
                    life += 1
                    game_state = game.get_state()  # Observe again after we take the action
                    misc = game_state.game_variables

            # Reward Shaping
                r_t = agent.shape_reward(r_t, misc, prev_misc, t)

            # Save trajactory sample <s, a, r> to the memory
                agent.append_sample(s_t, action_idx, r_t)

            # Update the cache
                t += 1
                prev_misc = misc

                if (is_terminated and t > agent.observe):
                # Every episode, agent learns from sample returns
                    loss = agent.train_model()

            # Save model every 10000 iterations
                if t % 100 == 0: #10000
                    print("Save model")
                    agent.model.save("/home/spillingvoid/Downloads/programs/Doom/models/testrfdoom.h5", overwrite=True)
                    agent.model.save_weights("/home/spillingvoid/Downloads/programs/Doom/models/testrfdoom.h5", overwrite=True)

                state = ""
                if t <= agent.observe:
                    state = "Observe mode"
                else:
                    state = "Train mode"

                if (is_terminated):

                    # Print performance statistics at every episode end
                    print("TIME", t, "/ GAME", GAME, "/ STATE", state, "/ ACTION", action_idx, "/ REWARD", r_t, "/ LIFE", max_life, "/ LOSS", loss)

                # Save Agent's Performance Statistics
                    if GAME % agent.stats_window_size == 0 and t > agent.observe: 
                        print("Update Rolling Statistics")
                        agent.mavg_score.append(np.mean(np.array(life_buffer)))
                        agent.var_score.append(np.var(np.array(life_buffer)))
                        agent.mavg_pistol_left.append(np.mean(np.array(pistol_buffer)))
                        agent.mavg_kill_counts.append(np.mean(np.array(kills_buffer)))
                        agent.mavg_shotgun_left.append(np.mean(np.array(shotgun_buffer)))
                        agent.mavg_minigun_left.append(np.mean(np.array(minigun_buffer)))
                        agent.mavg_plasma_left.append(np.mean(np.array(plasma_buffer)))
                        agent.mavg_rocket_left.append(np.mean(np.array(rocket_buffer)))
                        agent.mavg_secret_left.append(np.mean(np.array(secret_buffer)))
                        agent.mavg_hit_count.append(np.mean(np.array(hit_buffer)))
                        agent.mavg_damage_given.append(np.mean(np.array(damtaken_buffer)))
                        agent.mavg_item_collected.append(np.mean(np.array(item_buffer)))
                        agent.mavg_armor.append(np.mean(np.array(armor_buffer)))
                                       
                    # Reset rolling stats buffer
                        life_buffer, pistol_buffer, kills_buffer, shotgun_buffer, minigun_buffer, plasma_buffer, rocket_buffer, secret_buffer, hit_buffer, damtaken_buffer, item_buffer, armor_buffer = [], [], [], [], [], [], [], [], [], [], [], []

                    # Write Rolling Statistics to file
                        with open("/home/spillingvoid/Downloads/programs/Doom/statistics/reinforce_stats.txt", "a+") as stats_file:
                            stats_file.write('Game: ' + str(GAME) + '\n')
                            stats_file.write('Max Score: ' + str(max_life) + '\n')
                            stats_file.write('mavg_score: ' + str(agent.mavg_score) + '\n')
                            stats_file.write('var_score: ' + str(agent.var_score) + '\n')
                            stats_file.write('mavg_pistol_left: ' + str(agent.mavg_pistol_left) + '\n')
                            stats_file.write('mavg_kill_counts: ' + str(agent.mavg_kill_counts) + '\n')
                            stats_file.write('mavg_shotgun_left: ' + str(agent.mavg_shotgun_left) + '\n')
                            stats_file.write('mavg_minigun_left: ' + str(agent.mavg_minigun_left) + '\n')
                            stats_file.write('mavg_plasma_left: ' + str(agent.mavg_plasma_left) + '\n')
                            stats_file.write('mavg_rocket_left: ' + str(agent.mavg_rocket_left) + '\n')
                            stats_file.write('mavg_secret_left: ' + str(agent.mavg_secret_left) + '\n')
                            stats_file.write('mavg_hit_count: ' + str(agent.mavg_hit_count) + '\n')
                            stats_file.write('mavg_damage_given: ' + str(agent.mavg_damage_given) + '\n')
                            stats_file.write('mavg_item_collected: ' + str(agent.mavg_item_collected) + '\n')
                            stats_file.write('mavg_armor: ' + str(agent.mavg_armor) + '\n')
        
        # Episode Finish. Increment game count
            GAME += 1

misc [  0.   0.   0.   0.   0. 100.   0.  50.   0.  50.   0.   0.   0.]
Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d (Conv2D)              (None, 6, 10, 32)         8224      
_________________________________________________________________
batch_normalization (BatchNo (None, 6, 10, 32)         128       
_________________________________________________________________
re_lu (ReLU)                 (None, 6, 10, 32)         0         
_________________________________________________________________
conv2d_1 (Conv2D)            (None, 2, 4, 64)          32832     
_________________________________________________________________
batch_normalization_1 (Batch (None, 2, 4, 64)          256       
_________________________________________________________________
re_lu_1 (ReLU)               (None, 2, 4, 64)          0         
__________________________________________________

  img = skimage.color.rgb2gray(img)


Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Re

Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Save model
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004

Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Re

Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Save model
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004

Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Save model
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004

Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Re

Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Save model
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004

Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Save model
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004

Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Re

Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Save model
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004

Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Re

Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Re

Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Save model
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004

Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward 0.996
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Rew

Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Re

Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Save model
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004

Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Re

Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Re

Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Save model
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004

Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Re

Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Episode Finish  [  0.   0.   0.   0.   0. 100.   0.  31.   0.  31.   0.   0.   0.] [0.06589188 0.03869712 0.06345648 0.06098068 0.05233157 0.05205997
 0.05511546 0.04414927 0.05147725 0.

Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Save model
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004

Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Reward -0.004
Shaping Re

33/33 - 1s - loss: 0.0160
Save model
TIME 8400 / GAME 7 / STATE Train mode / ACTION 13 / REWARD -0.004 / LIFE 1049 / LOSS [0.016033630818128586]


In [8]:
print("Training complete")
endtime = time()
print(" Test Time elapsed: %.2f minutes" % ((endtime - teststart) / 60.0))

Training complete
 Test Time elapsed: 32.10 minutes
