# Atari breakout game, we are trying to have the best score

https://www.gymlibrary.dev/environments/atari/breakout/

In [None]:
import gymnasium as gym 
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import VecFrameStack
from stable_baselines3.common.vec_env import DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
from stable_baselines3.common.env_util import make_atari_env
import os
import time

In [None]:
import torch
print(f"CUDA available: {torch.cuda.is_available()}")
print(f"Current device: {torch.cuda.current_device() if torch.cuda.is_available() else 'CPU'}")
if torch.cuda.is_available():
    print(f"Device name: {torch.cuda.get_device_name()}")

In [None]:
env = gym.make("ALE/Breakout-v5", render_mode="human")

In [None]:
#checking the environment
env.reset()

In [None]:
#checking type of action space
env.action_space

In [None]:
#checking type of observation space
env.observation_space

In [None]:
env.close()

## Begin

In [None]:
env = gym.make("ALE/Breakout-v5", render_mode="human", obs_type="rgb", frameskip=4)
episodes = 5

for episode in range(1, episodes+1):
    obs = env.reset()
    done = False
    score = 0
    
    while not done:
        env.render()
        action = env.action_space.sample()
        n_state, reward, done, truncated, info = env.step(action)
        score += reward
        done = done or truncated

    print(f"Episode: {episode}, Score: {score}")

env.close()

## Vectorize environment and train, allows to train in parallel

In [None]:
#4 at the same time
env = make_atari_env('ALE/Breakout-v5', n_envs=4, seed=0)
env = VecFrameStack(env, n_stack=4)

In [None]:
env.reset()

In [None]:
env.render()

In [None]:
env.close()

In [None]:
log_path = os.path.join('Training', 'Logs')

In [None]:
#Uses a convolutional neural network as the policy since we are working with images
model = A2C("CnnPolicy", env, verbose=1, tensorboard_log=log_path)

In [None]:
model.learn(total_timesteps=100000)

Mean length: 598  
Mean reward: 13.1  
97% explained variance

Very good results!

## Save and load

In [None]:
a2c_path = os.path.join('Training', 'Saved Models', 'A2C_model')
model.save(a2c_path)
#del model
#env = make_atari_env('ALE/Breakout-v5', n_envs=1, seed=0)
#env = VecFrameStack(env, n_stack=4)
#model = A2C.load(a2c_path, env)

## Evaluate and test

In [None]:
env = make_atari_env('ALE/Breakout-v5', n_envs=1, seed=0)
env = VecFrameStack(env, n_stack=4)
evaluate_policy(model, env, n_eval_episodes=10, render=True)

Average score of 15.3! Great results  
Standard deviation of reward of 2.14
