In [1]:
from __future__ import division

import argparse
import os
import matplotlib
matplotlib.use('TkAgg')
import matplotlib.animation as animation
import matplotlib.pyplot as plt

from catch_ball import CatchBall
from dqn_agent import DQNAgent

def init():
    img.set_array(state_t_1)
    plt.axis("off")
    return img,


def animate(step):
    global win, lose
    global state_t_1, reward_t, terminal

    if terminal:
        env.reset()

        # for log
        if reward_t == 1:
            win += 1
        elif reward_t == -1:
            lose += 1

        print("WIN: {:03d}/{:03d} ({:.1f}%)".format(win, win + lose, 100 * win / (win + lose)))

    else:
        state_t = state_t_1

        # execute action in environment
        action_t = agent.select_action(state_t, 0.0)
        env.execute_action(action_t)

    # observe environment
    state_t_1, reward_t, terminal = env.observe()

    # animate
    img.set_array(state_t_1)
    plt.axis("off")
    return img,

In [2]:
# args
parser = argparse.ArgumentParser()
parser.add_argument("-m", "--model_path")
parser.add_argument("-s", "--save", dest="save", action="store_true")
parser.set_defaults(save=False)
args = parser.parse_args(["-m", ""])

In [None]:
# environmet, agent
env = CatchBall()
agent = DQNAgent(env.enable_actions, env.name)
agent.load_model(args.model_path)

# variables
win, lose = 0, 0
state_t_1, reward_t, terminal = env.observe()

# animate
fig = plt.figure(figsize=(env.screen_n_rows / 2, env.screen_n_cols / 2))
fig.canvas.set_window_title("{}-{}".format(env.name, agent.name))
img = plt.imshow(state_t_1, interpolation="none", cmap="gray")
ani = animation.FuncAnimation(fig, animate, init_func=init, interval=(1000 / env.frame_rate), blit=True)

if args.save:
    # save animation (requires ImageMagick)
    ani_path = os.path.join(
        os.path.dirname(os.path.abspath(__file__)), "tmp", "demo-{}.gif".format(env.name))
    ani.save(ani_path, writer="imagemagick", fps=env.frame_rate)
else:
    # show animation
    plt.show()

WIN: 001/001 (100.0%)
WIN: 002/002 (100.0%)
WIN: 003/003 (100.0%)
WIN: 004/004 (100.0%)
WIN: 005/005 (100.0%)
WIN: 006/006 (100.0%)
WIN: 007/007 (100.0%)
WIN: 008/008 (100.0%)
WIN: 009/009 (100.0%)
WIN: 010/010 (100.0%)
WIN: 011/011 (100.0%)
WIN: 012/012 (100.0%)
WIN: 013/013 (100.0%)
WIN: 014/014 (100.0%)
WIN: 015/015 (100.0%)
WIN: 016/016 (100.0%)
WIN: 017/017 (100.0%)
WIN: 018/018 (100.0%)
WIN: 019/019 (100.0%)
WIN: 020/020 (100.0%)
WIN: 021/021 (100.0%)
WIN: 022/022 (100.0%)
WIN: 023/023 (100.0%)
WIN: 024/024 (100.0%)
WIN: 025/025 (100.0%)
WIN: 026/026 (100.0%)
WIN: 027/027 (100.0%)
WIN: 028/028 (100.0%)
WIN: 029/029 (100.0%)
WIN: 030/030 (100.0%)
WIN: 031/031 (100.0%)
WIN: 032/032 (100.0%)
WIN: 033/033 (100.0%)
WIN: 034/034 (100.0%)
WIN: 035/035 (100.0%)
WIN: 036/036 (100.0%)
WIN: 037/037 (100.0%)
WIN: 038/038 (100.0%)
WIN: 039/039 (100.0%)
WIN: 040/040 (100.0%)
WIN: 041/041 (100.0%)
WIN: 042/042 (100.0%)
WIN: 043/043 (100.0%)
WIN: 044/044 (100.0%)
WIN: 045/045 (100.0%)
WIN: 046/0