### Setup

conda create --prefix=D:/L6_SWD/Dissertation/Project/CondaEnvs/diss_test python=3.8

conda activate diss_test

conda install ipykernel

#### Pip installs

pip install gymnasium[atari]
pip install gymnasium[accept-rom-license]


pip install stable-baselines3
pip install ale-py==0.7.4
pip install opencv-python

pip install tensorflow

### Import

In [None]:
import gymnasium as gym
from stable_baselines3 import DQN
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env

### Create Env and Test with a random agent

In [None]:
environment_name = "ALE/SpaceInvaders-v5"

def CreateEnv():   
    env = gym.make(environment_name,render_mode="human")#render mode can be changed to None
    return env

def CreateVectorEnvs():
    envs = gym.vector.make(environment_name,render_mode="human",num_envs=4)#render mode can be changed to None
    return envs

In [None]:
def TestEnv(env):
    try:
        episodes = 5
        for episode in range(1, episodes+1):
            state = env.reset()
            score = 0
            terminated = False

            while not terminated:
                env.render()
                action = env.action_space.sample()

                observation, reward, terminated, truncated, info = env.step(action)
                score+=reward

            print('Episode:{} Score:{}'.format(episode, score))


    except KeyboardInterrupt:
        env.reset()
        env.close()

In [None]:
env = CreateEnv()
env.action_space

In [None]:
#TestEnv(env)

### Create RL Models And Train

#### Logging

In [None]:
logPath = "./Training/Logs/SpaceInvaders/DQN"

#### Create Model

In [None]:
env = make_atari_env('ALE/SpaceInvaders-v5', n_envs=2, seed=0)

In [None]:
env = VecFrameStack(env,n_stack=2)

In [None]:
model = DQN("CnnPolicy",env,verbose=1,tensorboard_log=logPath,buffer_size=400000)

#### Train Model

In [None]:
model.learn(total_timesteps=1000000)

#### Save Model

In [1]:
savePath = "./SavedModels/SpaceInvaders/DQN/DQN1000000_2"

In [None]:
model.save(savePath+"-Model")

In [None]:
policy = model.policy
policy.save(savePath+"-Policy")

In [None]:
model.save_replay_buffer(savePath+"-ReplayBuffer")

#### Load Model

In [None]:
env = make_atari_env('ALE/SpaceInvaders-v5', n_envs=2, seed=0)
env = VecFrameStack(env, n_stack=2)

In [None]:
model = DQN.load(savePath, env)

#### Test Model

In [None]:
evaluate_policy(model, env, n_eval_episodes=10, render=True)

In [None]:
obs = env.reset()
while True:
    action, _states = model.predict(obs)
    obs, rewards, dones, info = env.step(action)
    env.render()

In [None]:
env.close()

#### Logs

    tensorboard --logdir E:/L6_SWD/Dissertation/Project/TestCode/Training/Logs/SpaceInvaders/DQN/DQN_2