Importing Dependencies

In [1]:
import gym
from stable_baselines3 import A2C 
from stable_baselines3.common.vec_env import VecFrameStack 
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env # importing atari environment
import os

Test Environment

In [2]:
!python -m atari_py.import_roms ./ROMS

copying adventure.bin from ./ROMS\Adventure (1980) (Atari, Warren Robinett) (CX2613, CX2613P) (PAL).bin to C:\anaconda3\envs\Pyton38\lib\site-packages\atari_py\atari_roms\adventure.bin
copying air_raid.bin from ./ROMS\Air Raid (Men-A-Vision) (PAL) ~.bin to C:\anaconda3\envs\Pyton38\lib\site-packages\atari_py\atari_roms\air_raid.bin
copying alien.bin from ./ROMS\Alien (1982) (20th Century Fox Video Games, Douglas 'Dallas North' Neubauer) (11006) ~.bin to C:\anaconda3\envs\Pyton38\lib\site-packages\atari_py\atari_roms\alien.bin
copying amidar.bin from ./ROMS\Amidar (1982) (Parker Brothers, Ed Temple) (PB5310) ~.bin to C:\anaconda3\envs\Pyton38\lib\site-packages\atari_py\atari_roms\amidar.bin
copying assault.bin from ./ROMS\Assault (AKA Sky Alien) (1983) (Bomb - Onbase) (CA281).bin to C:\anaconda3\envs\Pyton38\lib\site-packages\atari_py\atari_roms\assault.bin
copying asterix.bin from ./ROMS\Asterix (AKA Taz) (1984) (Atari, Jerome Domurat, Steve Woita) (CX2696).bin to C:\anaconda3\envs\Pyt

In [3]:
environment_name = "Breakout-v0"

In [4]:
env = gym.make('Breakout-v0')

In [5]:
episodes = 5
for episode in range(1, episodes+1):
    state = env.reset()
    done = False
    score = 0 
    while not done:
        env.render(mode='rgb_array')
        action = env.action_space.sample()
        n_state, reward, done, info = env.step(action)
        score+=reward
    print('Episode:{} Score:{}'.format(episode, score))
env.close()

Episode:1 Score:0.0
Episode:2 Score:2.0
Episode:3 Score:2.0
Episode:4 Score:2.0
Episode:5 Score:0.0


In [6]:
env.action_space.sample()

2

In [7]:
env.observation_space.sample()

array([[[ 22,  35, 250],
        [ 41,  55,  19],
        [ 86, 205, 181],
        ...,
        [ 17,  29, 132],
        [218, 103, 188],
        [215, 125,  77]],

       [[153, 138, 155],
        [118, 148,   7],
        [109, 196, 248],
        ...,
        [133,  17, 207],
        [ 74, 131, 251],
        [143, 242, 128]],

       [[ 58, 196, 104],
        [  0, 198,  16],
        [ 17, 171, 160],
        ...,
        [ 61, 165, 135],
        [ 55,  71, 191],
        [134,  40,  57]],

       ...,

       [[149,  44,  21],
        [182, 203, 246],
        [133, 242, 241],
        ...,
        [236, 205, 150],
        [222,  41, 170],
        [ 75, 178, 113]],

       [[234, 203, 101],
        [206, 123, 243],
        [ 69, 172, 142],
        ...,
        [ 13, 132,  49],
        [162,  62, 213],
        [169, 140, 156]],

       [[161, 174, 219],
        [ 87, 195, 166],
        [189,  40, 172],
        ...,
        [ 38, 202, 223],
        [ 81, 154, 175],
        [100,  99,  85]]

In [8]:
env = make_atari_env('Breakout-v0', n_envs=4, seed=0)

In [9]:
env = VecFrameStack(env, n_stack=4)

In [10]:
log_path = os.path.join('Training', 'Logs')

In [11]:
model = A2C("CnnPolicy", env, verbose=1, tensorboard_log=log_path)
model.learn(total_timesteps=20000)

Using cpu device
Wrapping the env in a VecTransposeImage.
Logging to Training\Logs\A2C_1
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 275      |
|    ep_rew_mean        | 1.46     |
| time/                 |          |
|    fps                | 148      |
|    iterations         | 100      |
|    time_elapsed       | 13       |
|    total_timesteps    | 2000     |
| train/                |          |
|    entropy_loss       | -1.39    |
|    explained_variance | 0.0669   |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | 0.0399   |
|    value_loss         | 0.0542   |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 270      |
|    ep_rew_mean        | 1.38     |
| time/                 |          |
|    fps                | 138      |
|    iterations         | 200      |
|    time_elapsed      

<stable_baselines3.a2c.a2c.A2C at 0x212b1205520>

In [12]:
a2c_path = os.path.join('Training', 'Saved Models', 'A2C_model')

In [13]:
model.save(a2c_path)

In [14]:
del model

In [15]:
#env = make_atari_env('Breakout-v0', n_envs=1, seed=0)
#env = VecFrameStack(env, n_stack=4)
env = make_atari_env('Breakout-v0', n_envs=1, seed=0)
env = VecFrameStack(env, n_stack=4)

In [16]:
model = A2C.load(a2c_path)

In [17]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)



(5.0, 1.5491933384829668)

In [18]:
env.close()

In [29]:
obs = env.reset()
val = True
itr = 0
while (val==True):
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    env.render()
    itr = itr + 1
    if(itr==10):
        val = False

In [30]:
env.close()