In [1]:
# your model path
model_path = "DL_comp4_13_model"

# set it True if your model returns multiple values
multiple_return_values = False

# set visible GPU
gpu_number = 0

# print out more information
verbose = False

# public seed is 2021
seed = 2021

In [2]:
import copy
import numpy as np
import tensorflow as tf

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        tf.config.experimental.set_visible_devices(gpus[gpu_number], 'GPU')
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        logical_gpus = tf.config.experimental.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPUs")
    except RuntimeError as e:
        # Memory growth must be set before GPUs have been initialized
        print(e)

3 Physical GPUs, 1 Logical GPUs


In [3]:
model = tf.keras.models.load_model(model_path, compile=False)

In [4]:
import os
os.environ["SDL_VIDEODRIVER"] = "dummy"  # this line disable pop-out window
from ple.games.flappybird import FlappyBird
from ple import PLE

game = FlappyBird()
env = PLE(game, fps=30, display_screen=False, rng=seed)  # game environment interface
env.reset_game()

pygame 1.9.6
Hello from the pygame community. https://www.pygame.org/contribute.html
couldn't import doomish
Couldn't import doom


In [5]:
def TA_state():
    state = copy.deepcopy(game.getGameState())
    
    state['next_next_pipe_bottom_y'] -= state['player_y']
    state['next_next_pipe_top_y'] -= state['player_y']
    state['next_pipe_bottom_y'] -= state['player_y']
    state['next_pipe_top_y'] -= state['player_y']
    relative_state = list(state.values())


    # return the state in tensor type, with batch dimension
    relative_state = tf.convert_to_tensor(relative_state, dtype=tf.float32)
    relative_state = tf.expand_dims(relative_state, axis=0)
    
    return relative_state

In [6]:
alive_times = []
episode_rewards = []

for test_num in range(1, 101):
    alive_time = 1
    episode_reward = 0
    env.reset_game()

    while not env.game_over():
        state = TA_state()

        # Your model should return action probabilities
        # In other words, the last layer of your model should be Softmax
        
        if not multiple_return_values:
            action_prob = model(state)
        else:
            action_prob = model(state)[0]

        if verbose:
            print(f"test num: {test_num}, frame: {alive_time}, action probs: {action_prob}")
            
        action_idx = tf.argmax(action_prob, axis=1)[0]

        reward = env.act(env.getActionSet()[action_idx])

        alive_time += 1
        episode_reward += reward
        
    alive_times.append(alive_time)
    episode_rewards.append(episode_reward)

    if verbose:
        print(f"[{test_num}] alive: {alive_time}, episode reward: {episode_reward}")
    
print(f"average alive time: {np.mean(np.asarray(alive_times))},\naverage episode reward: {np.mean(np.asarray(episode_rewards))}\nshow your result https://docs.google.com/spreadsheets/d/1QHNmes31XdUSsG2K9U7cgTggeGfiMgADvrJJsETjbxM")

average alive time: 916.17,
average episode reward: 17.89
show your result https://docs.google.com/spreadsheets/d/1QHNmes31XdUSsG2K9U7cgTggeGfiMgADvrJJsETjbxM
