In [1]:
# Playing Atari using PTAN library

In [2]:
import warnings
warnings.filterwarnings('ignore',category=FutureWarning) 
# suppress numpy future warnings 
# warning created due to issues with tensorflow 1.14
import collections

In [3]:
import gym
import ptan
import argparse
import time
import torch
from lib import dqn_model, common

In [4]:
import os
import random
import numpy as np
seed = 2390857
random.seed(seed)
torch.manual_seed(seed)
torch.cuda.manual_seed(seed)
np.random.seed(seed)
os.environ['PYTHONHASHSEED'] = str(seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [5]:
params = common.HYPERPARAMS['breakout']

# Get parameters as specified in ./lib/common.py file

# HYPERPARAMS = {
#     'pong': {
#         'env_name':         "PongNoFrameskip-v4",
#         'stop_reward':      18.0,
#         'run_name':         'pong',
#         'replay_size':      100000,
#         'replay_initial':   10000,
#         'target_net_sync':  1000,
#         'epsilon_frames':   10**5,
#         'epsilon_start':    1.0,
#         'epsilon_final':    0.02,
#         'learning_rate':    0.0001,
#         'gamma':            0.99,
#         'batch_size':       32
#     }


In [6]:
USE_GPU = True
USE_CUDA = torch.cuda.is_available() and USE_GPU
device = torch.device("cuda" if USE_CUDA else "cpu")

In [9]:
# seed = 189762
# [324267, 250271, 189762, 542357, 519293, 516463, 353568, 110577, 953419, 405991]
for seed in [324267, 250271, 189762, 542357, 519293, 516463, 353568, 110577, 953419, 405991]:
    env = gym.make(params['env_name'])
    env = ptan.common.wrappers.wrap_dqn(env)
    #  Record video
#     name = 'pong-1step-double-324267'
#     env = gym.wrappers.Monitor(env, "recording/"+name+"_"+str(seed)+"/", video_callable=lambda episode_id: True,force=True)

#     net = dqn_model.DQN(env.observation_space.shape, 
#                         env.action_space.n).to(device)
    net = dqn_model.DuelingDQN(env.observation_space.shape, 
                        env.action_space.n).to(device)
    
    MODEL_FILENAME = './models/breakout-3step-double-dueling-srg_0.0001-324267.pt'

#     MODEL_FILENAME = './models/pong-1step-double-dueling-324267.pt'
#     MODEL_FILENAME = './models/pong/pong_basic/pong-basic_'+str(seed)+'.pt'
#     MODEL_FILENAME = './models/pong/pong-nsteps_3/pong-nsteps_3_110577.pt'
#     MODEL_FILENAME = './models/pong/pong-nsteps_4/pong-nsteps_4_110577.pt'

    net.load_state_dict(torch.load(MODEL_FILENAME))

    VISUALIZE = True
    FPS = 200

    for plays in range(1):
        state = env.reset()
        total_reward = 0.0
        action_counter = collections.Counter()

        while True:
            start_ts = time.time()
            if VISUALIZE:
                env.render()

            state_v = torch.tensor(np.array(state, copy=False)).unsqueeze(dim=0)
            q_vals = net(state_v.to(device)).data.cpu().numpy()[0]

            action = np.argmax(q_vals)
            action_counter[action] += 1
            state, reward, done, _ = env.step(action)
            total_reward += reward
            if done:
                break
            if VISUALIZE:
                delta = 1/FPS - (time.time() - start_ts)
                if delta > 0:
                    time.sleep(delta)
        print("Total reward: %.2f" % total_reward)
        print("Action counts:", action_counter)
    env.close()

Total reward: 4.00
Action counts: Counter({1: 77, 3: 25, 2: 23, 0: 14})
Total reward: 10.00
Action counts: Counter({1: 267, 3: 45, 0: 33, 2: 30})
Total reward: 4.00
Action counts: Counter({1: 116, 3: 15, 0: 12, 2: 11})
Total reward: 4.00
Action counts: Counter({1: 77, 3: 25, 2: 23, 0: 14})
Total reward: 10.00
Action counts: Counter({1: 267, 3: 45, 0: 33, 2: 30})
Total reward: 4.00
Action counts: Counter({1: 77, 3: 25, 2: 23, 0: 14})
Total reward: 4.00
Action counts: Counter({1: 77, 3: 25, 2: 23, 0: 14})
Total reward: 4.00
Action counts: Counter({1: 116, 3: 15, 0: 12, 2: 11})
Total reward: 4.00
Action counts: Counter({1: 77, 3: 25, 2: 23, 0: 14})
Total reward: 4.00
Action counts: Counter({1: 116, 3: 15, 0: 12, 2: 11})


In [None]:
env.close()

In [None]:
# seed = 189762
# params = common.HYPERPARAMS['beamrider-v1']

# name = 'beamrider_srg'
# # [324267, 250271, 189762, 542357, 519293, 516463, 353568, 110577, 953419, 405991]
# env = gym.make(params['env_name'])
# env = ptan.common.wrappers.wrap_dqn(env)
# #  Record video
# env = gym.wrappers.Monitor(env, "recording/"+name+"_"+str(seed)+"/", video_callable=lambda episode_id: True,force=True)

# net = dqn_model.DQN(env.observation_space.shape, 
#                     env.action_space.n).to(device)

# MODEL_FILENAME = './models/beamrider-v0-basic-srg-953419.pt'
# # MODEL_FILENAME = './models/pong/pong-nsteps_3/pong-nsteps_3_110577.pt'
# # MODEL_FILENAME = './models/pong/pong-nsteps_4/pong-nsteps_4_110577.pt'

# net.load_state_dict(torch.load(MODEL_FILENAME))

# VISUALIZE = True
# FPS = 200

# for plays in range(1):
#     state = env.reset()
#     total_reward = 0.0
#     action_counter = collections.Counter()

#     while True:
#         start_ts = time.time()
#         if VISUALIZE:
#             env.render()

#         state_v = torch.tensor(np.array(state, copy=False)).unsqueeze(dim=0)
#         q_vals = net(state_v.to(device)).data.cpu().numpy()[0]

#         action = np.argmax(q_vals)
#         action_counter[action] += 1
#         state, reward, done, _ = env.step(action)
#         total_reward += reward
#         if done:
#             break
#         if VISUALIZE:
#             delta = 1/FPS - (time.time() - start_ts)
#             if delta > 0:
#                 time.sleep(delta)
#     print("Total reward: %.2f" % total_reward)
#     print("Action counts:", action_counter)
# env.close()