### Setup

conda create --prefix=D:/L6_SWD/Dissertation/Project/CondaEnvs/diss_test python=3.8

conda activate diss_test

conda install ipykernel

#### Pip installs

pip install gymnasium[atari]
pip install gymnasium[accept-rom-license]


pip install stable-baselines3
pip install ale-py==0.7.4
pip install opencv-python

pip install tensorflow

### Import

In [1]:
import gymnasium as gym
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env

### Create Env and Test with a random agent

In [2]:
environment_name = "ALE/BankHeist-v5"

#note: FOCUS ON BANK HEIST (SHOOTER GAME) AND one of these : amidar, alien, ms_pacman

def CreateEnv():   
    env = gym.make(environment_name,render_mode="human")
    return env

def CreateVectorEnvs():
    envs = gym.vector.make(environment_name,render_mode="human",num_envs=4)
    return envs

In [3]:
def TestEnv(env):
    try:
        episodes = 5
        for episode in range(1, episodes+1):
            state = env.reset()
            score = 0
            terminated = False

            while not terminated:
                env.render()
                action = env.action_space.sample()

                observation, reward, terminated, truncated, info = env.step(action)
                score+=reward

            print('Episode:{} Score:{}'.format(episode, score))
    except KeyboardInterrupt:
        env.reset()
        env.close()

In [4]:
env = CreateEnv()
env.action_space

Discrete(18)

In [5]:
TestEnv(env)

  logger.warn(


TypeError: game_over(): incompatible function arguments. The following argument types are supported:
    1. (self: ale_py._ale_py.ALEInterface) -> bool

Invoked with: <ale_py._ale_py.ALEInterface object at 0x000001DA06D4CE30>; kwargs: with_truncation=False

### Create RL Models

#### Logging

In [7]:
logPath = "./Training/Logs/BankHeist/A2C/A2C-100000"

#### Create Model

In [6]:
env = make_atari_env('ALE/BankHeist-v5', n_envs=4, seed=0)

In [10]:
#Wrap environment 
env = VecFrameStack(env,n_stack=4)

In [11]:
model = A2C("CnnPolicy",env,verbose=1,tensorboard_log=logPath)

Using cpu device
Wrapping the env in a VecTransposeImage.


#### Train Model

In [12]:
model.learn(total_timesteps=5000)

Logging to ./Training/Logs/SpaceInvaders/A2C/A2C-5000\A2C_1
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 646      |
|    ep_rew_mean        | 200      |
| time/                 |          |
|    fps                | 377      |
|    iterations         | 100      |
|    time_elapsed       | 5        |
|    total_timesteps    | 2000     |
| train/                |          |
|    entropy_loss       | -0.783   |
|    explained_variance | 0.753    |
|    learning_rate      | 0.0007   |
|    n_updates          | 99       |
|    policy_loss        | -0.125   |
|    value_loss         | 0.543    |
------------------------------------
------------------------------------
| rollout/              |          |
|    ep_len_mean        | 600      |
|    ep_rew_mean        | 235      |
| time/                 |          |
|    fps                | 383      |
|    iterations         | 200      |
|    time_elapsed       | 10       |
|    total_time

<stable_baselines3.a2c.a2c.A2C at 0x1a743a04af0>

#### Save Model

In [13]:
savePath = "./SavedModels/SpaceInvaders/A2C/A2C-5000"

In [14]:
model.save(savePath+"-Model")

In [15]:
policy = model.policy
policy.save(savePath+"-Policy")


#### Load Model

In [19]:
env = make_atari_env('ALE/SpaceInvaders-v5', n_envs=4, seed=0)
env = VecFrameStack(env, n_stack=4)

In [20]:
model = A2C.load(savePath+"-Model", env)

Wrapping the env in a VecTransposeImage.


#### Test Model

In [21]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)

(263.5, 19.5)

In [None]:
obs = env.reset()
while True:
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    env.render()

In [23]:
env.close()

### Logs

tensorboard --logdir E:/L6_SWD/Dissertation/Project/TestCode/Training/Logs/SpaceInvaders/A2C/A2C_1-1000000