In [None]:
import gymnasium as gym
from gymnasium import spaces
from Agent import NeuralAgent, plot_return
import matplotlib.pyplot as plt

In [None]:
# All gymnasium environments follow an API format that ensures they are each fully featured, self sufficient, and all essentially the same (looking at the environment from a black-box perspective)

# An environment has a make method, a reset method, a step method, and a render method. In a typical loop, you use the make() method to create the environment, reset() to set and return the initial state of the environment, then repeat:
    # determine which action the agent should take next (the reinforcement learning component)
    # pass this action into the step() method to advance the environment
        # the render is automatically displayed by step() if render_mode is set to 'human'
# you can reset() and repeat this loop multiple times using the same initialized environment (made with make()).

# every environment also has an action_space attribute and an observation_space attribute. The action_space attribute defines the possible actions that can be taken in this environment, defining the output of our RL network. The observation_space attribute defines the format of the state space (i.e. what the input will look like for our RL network)

In [None]:
# for example, the code in this cell spins up the game we are "learning" in this project, and just takes random actions at each time step:

run_sample = False
if run_sample: # so the sample code does not run
    sample_env = gym.make("ALE/Berzerk-v5", render_mode="human")
    sample_env.reset()
    
    for _ in range(1000):
        sample_action = sample_env.action_space.sample()  # agent policy that uses the observation and info
        _, _, sample_terminated, sample_truncated, _ = sample_env.step(sample_action)
    
        if sample_terminated or sample_truncated:
            observation, info = sample_env.reset()
    
    sample_env.close()

# Vanilla Agent Implementation

In [None]:
# Our main goal is to train an instance of a vanilla RL agent (implementation in Agent.py) to play this game, and evaluate its execution

In [None]:
# wrapper helper
class ReduceActionSpace(gym.Wrapper):
    def __init__(self, env, actions):
        super().__init__(env)
        self.action_map = actions
        # Define the new action space
        self.action_space = spaces.Discrete(len(actions))

    def step(self, action):
        # Map the action from the reduced space to the original space
        action = self.action_map[action]
        return self.env.step(action)

In [None]:
do_lunar = False
if do_lunar:
    ###################################################################################################
    # Initialize environment and extract all relevant components
    
    train_env = gym.make("LunarLander-v2", render_mode="rgb_array")
    
    # assuming discrete action space and 1-dimensional observation space
    ACTION_SPACE_SIZE = train_env.action_space.n
    print(f"Action space size: {ACTION_SPACE_SIZE}")
    OBSERVATION_SPACE_SIZE = train_env.observation_space.shape[0]
    print(f"Observation space size: {OBSERVATION_SPACE_SIZE}")
    
    #################################################################################################
    # Initialize agent
    
    folder_to_save_checkpoints = "./lunarVALIDATE_checkpoints"
    # parameter settings for v5
    gamma = 0.99
    lr = .00025
    max_storage_size = 25000
    batch_size = 64
    exploration_rate_decay = 0.99999
    agent = NeuralAgent(OBSERVATION_SPACE_SIZE, ACTION_SPACE_SIZE, folder_to_save_checkpoints, gamma, lr, max_storage_size, batch_size, exploration_rate_decay)
    
    #################################################################################################
    # Load model into agent if desired
    
    load_agent = True
    load_file = "./lunarV5_checkpoints/04-21_14-10-27_neural_agent_2.pth"
    
    if load_agent:
        agent.load(load_file)
    
    ########################################################################################################
    # Train the agent if desired. Will display metrics per batch of episodes, as well as a plot of returns for each episode when training is done
    
    perform_training = False
    
    if perform_training:
        print(f"Device: {agent.device}\nLearning Rate: {agent.lr}\nBatch Size: {agent.batch_size}\nExploration Rate Decay: {agent.exploration_rate_decay}\nMaximum Memory Size: {agent.max_storage_size}")
        np_filename = agent.simulate(train_env, 1000)
        plot_return(np_filename)
    
    train_env.close()
    ########################################################################################################
    # Inject the vanilla RL agent into the gymnasium render loop, and visualize (in a human way) how well it plays the game:
    
    # Lunar V5 performed the best (parameters as above) - solved the game!
        # important part was making the memory size small
    
    do_render = True
    
    if do_render:
        env_human = gym.make("LunarLander-v2", render_mode="human")
        agent.render_agent_game(env_human, 2000)

In [None]:
do_pong = False
if do_pong:
    ###################################################################################################
    # Initialize environment and extract all relevant components
    
    train_env = gym.make("ALE/Pong-v5", repeat_action_probability=0, obs_type="ram", render_mode="rgb_array")
    
    # assuming discrete action space and 1-dimensional observation space
    ACTION_SPACE_SIZE = train_env.action_space.n
    print(f"Action space size: {ACTION_SPACE_SIZE}")
    OBSERVATION_SPACE_SIZE = train_env.observation_space.shape[0]
    print(f"Observation space size: {OBSERVATION_SPACE_SIZE}")
    
    #################################################################################################
    # Initialize agent
    
    folder_to_save_checkpoints = "./pongV2_checkpoints"
    gamma = 0.99
    lr = .000075
    max_storage_size = 40000
    batch_size = 64
    exploration_rate_decay = 0.9999994
    agent = NeuralAgent(OBSERVATION_SPACE_SIZE, ACTION_SPACE_SIZE, folder_to_save_checkpoints, gamma, lr, max_storage_size, batch_size, exploration_rate_decay)
    
    #################################################################################################
    # Load model into agent if desired
    
    load_agent = True
    load_file = "./pongV2_checkpoints/04-23_01-29-34_neural_agent_19.pth" # 19 was good
    
    if load_agent:
        agent.load(load_file)
    
    ########################################################################################################
    # Train the agent if desired. Will display metrics per batch of episodes, as well as a plot of returns for each episode when training is done
    
    perform_training = False
    
    if perform_training:
        print(f"Device: {agent.device}\nLearning Rate: {agent.lr}\nBatch Size: {agent.batch_size}\nExploration Rate Decay: {agent.exploration_rate_decay}\nMaximum Memory Size: {agent.max_storage_size}")
        np_filename = agent.simulate(train_env, 6000)  # for pong, one episode goes till the score hits 21 for either side - much longer episodes
        plot_return(np_filename)
    
    train_env.close()
    ########################################################################################################
    # Inject the vanilla RL agent into the gymnasium render loop, and visualize (in a human way) how well it plays the game:
    
    do_render = True
    
    if do_render:
        env_human = gym.make("ALE/Pong-v5", repeat_action_probability=0, obs_type="ram", render_mode="human")
        agent.render_agent_game(env_human, 2000)

In [None]:
do_beam = False
if do_beam:
    ###################################################################################################
    # Initialize environment and extract all relevant components
    
    train_env = gym.make("ALE/BeamRider-v5", repeat_action_probability=0, obs_type="ram", render_mode="rgb_array")
    CUSTOM_ACTION_SPACE = [1, 3, 4]
    train_env = ReduceActionSpace(train_env, CUSTOM_ACTION_SPACE)
    
    # assuming discrete action space and 1-dimensional observation space
    ACTION_SPACE_SIZE = train_env.action_space.n
    print(f"Action space size: {ACTION_SPACE_SIZE}")
    OBSERVATION_SPACE_SIZE = train_env.observation_space.shape[0]
    print(f"Observation space size: {OBSERVATION_SPACE_SIZE}")
    
    #################################################################################################
    # Initialize agent
    
    folder_to_save_checkpoints = "./beamV2_checkpoints"
    gamma = 0.99
    lr = .00005
    max_storage_size = 65000
    batch_size = 64
    exploration_rate_decay = 0.999995
    agent = NeuralAgent(OBSERVATION_SPACE_SIZE, ACTION_SPACE_SIZE, folder_to_save_checkpoints, gamma, lr, max_storage_size, batch_size, exploration_rate_decay)
    
    #################################################################################################
    # Load model into agent if desired
    
    load_agent = False
    load_file = ""
    
    if load_agent:
        agent.load(load_file)
    
    ########################################################################################################
    # Train the agent if desired. Will display metrics per batch of episodes, as well as a plot of returns for each episode when training is done
    
    perform_training = True
    
    if perform_training:
        print(f"Device: {agent.device}\nLearning Rate: {agent.lr}\nBatch Size: {agent.batch_size}\nExploration Rate Decay: {agent.exploration_rate_decay}\nMaximum Memory Size: {agent.max_storage_size}")
        np_filename = agent.simulate(train_env, 15000)
        plot_return(np_filename)
    
    train_env.close()
    ########################################################################################################
    # Inject the vanilla RL agent into the gymnasium render loop, and visualize (in a human way) how well it plays the game:
    
    do_render = False
    
    if do_render:
        env_human = gym.make("ALE/BeamRider-v5", repeat_action_probability=0, obs_type="ram", render_mode="human")
        env_human = ReduceActionSpace(env_human, CUSTOM_ACTION_SPACE)
        agent.render_agent_game(env_human, 2000)

In [None]:
do_chicken = True
if do_chicken:
    ###################################################################################################
    # Initialize environment and extract all relevant components
    
    train_env = gym.make("ALE/Freeway-v5", repeat_action_probability=0, obs_type="ram", render_mode="rgb_array")
    
    # assuming discrete action space and 1-dimensional observation space
    ACTION_SPACE_SIZE = train_env.action_space.n
    print(f"Action space size: {ACTION_SPACE_SIZE}")
    OBSERVATION_SPACE_SIZE = train_env.observation_space.shape[0]
    print(f"Observation space size: {OBSERVATION_SPACE_SIZE}")
    
    #################################################################################################
    # Initialize agent
    
    folder_to_save_checkpoints = "./chickenVVERIFY_checkpoints"
    gamma = 0.99
    lr = .00005
    max_storage_size = 50000
    batch_size = 64
    exploration_rate_decay = 0.9999935
    agent = NeuralAgent(OBSERVATION_SPACE_SIZE, ACTION_SPACE_SIZE, folder_to_save_checkpoints, gamma, lr, max_storage_size, batch_size, exploration_rate_decay)
    
    #################################################################################################
    # Load model into agent if desired
    
    load_agent = False
    load_file = "./chickenV2_checkpoints/04-24_08-57-24_neural_agent_34.pth"
    
    if load_agent:
        agent.load(load_file)
    
    ########################################################################################################
    # Train the agent if desired. Will display metrics per batch of episodes, as well as a plot of returns for each episode when training is done
    
    perform_training = True
    
    if perform_training:
        print(f"Device: {agent.device}\nLearning Rate: {agent.lr}\nBatch Size: {agent.batch_size}\nExploration Rate Decay: {agent.exploration_rate_decay}\nMaximum Memory Size: {agent.max_storage_size}")
        np_filename = agent.simulate(train_env, 1000)
        plot_return(np_filename)
    
    train_env.close()
    ########################################################################################################
    # Inject the vanilla RL agent into the gymnasium render loop, and visualize (in a human way) how well it plays the game:
    
    do_render = False
    
    if do_render:
        env_human = gym.make("ALE/Freeway-v5", repeat_action_probability=0, obs_type="ram", render_mode="human")
        agent.render_agent_game(env_human, 2000)

In [None]:
do_berzerk = False
if do_berzerk:
    ###################################################################################################
    # Initialize environment and extract all relevant components
    
    train_env = gym.make("ALE/Berzerk-v5", mode=1, repeat_action_probability=0, obs_type="ram", render_mode="rgb_array")
    CUSTOM_ACTION_SPACE = [1, 2, 3, 4, 5]
    train_env = ReduceActionSpace(train_env, CUSTOM_ACTION_SPACE)
    
    # assuming discrete action space and 1-dimensional observation space
    ACTION_SPACE_SIZE = train_env.action_space.n
    print(f"Action space size: {ACTION_SPACE_SIZE}")
    OBSERVATION_SPACE_SIZE = train_env.observation_space.shape[0]
    print(f"Observation space size: {OBSERVATION_SPACE_SIZE}")
    
    #################################################################################################
    # Initialize agent
    
    folder_to_save_checkpoints = "./berzerkV4_checkpoints"
    # v3 parameter settings, along w custom actions and 85000 episodes
    gamma = 0.99
    lr = .000001
    max_storage_size = 85000
    batch_size = 64
    exploration_rate_decay = 0.99999975
    agent = NeuralAgent(OBSERVATION_SPACE_SIZE, ACTION_SPACE_SIZE, folder_to_save_checkpoints, gamma, lr, max_storage_size, batch_size, exploration_rate_decay)
    
    #################################################################################################
    # Load model into agent if desired
    
    load_agent = True
    load_file = "./berzerkV3_checkpoints/04-22_07-51-24_neural_agent_30.pth"
    
    if load_agent:
        agent.load(load_file)
    
    ########################################################################################################
    # Train the agent if desired. Will display metrics per batch of episodes, as well as a plot of returns for each episode when training is done
    
    perform_training = False
    
    if perform_training:
        print(f"Device: {agent.device}\nLearning Rate: {agent.lr}\nBatch Size: {agent.batch_size}\nExploration Rate Decay: {agent.exploration_rate_decay}\nMaximum Memory Size: {agent.max_storage_size}")
        np_filename = agent.simulate(train_env, 85000)
        plot_return(np_filename)
    
    train_env.close()
    ########################################################################################################
    # Inject the vanilla RL agent into the gymnasium render loop, and visualize (in a human way) how well it plays the game:
    
    do_render = True
    
    if do_render:
        env_human = gym.make("ALE/Berzerk-v5", mode=1, repeat_action_probability=0, obs_type="ram", render_mode="human")
        env_human = ReduceActionSpace(env_human, CUSTOM_ACTION_SPACE)
        agent.render_agent_game(env_human, 2000)

In [None]:
# # code to make sure the "hack" to get the rgb render of the screen when render_mode is "human" works - put in paper
# 
# sample_env = gym.make("ALE/Berzerk-v5", obs_type="ram", render_mode="rgb_array")
# sample_env.reset()
# 
# for _ in range(13):
#     rgbgt = sample_env.render()
#     rgb = sample_env.unwrapped.ale.getScreenRGB()
# 
#     fig, axes = plt.subplots(1, 2)
#     axes[0].imshow(rgbgt)
#     axes[1].imshow(rgb)
#     axes[0].set_title(f"{rgb.shape[0]}")
#     plt.show()
# 
# 
#     sample_action = sample_env.action_space.sample()  # agent policy that uses the observation and info
#     _, _, sample_terminated, sample_truncated, _ = sample_env.step(sample_action)
# 
#     if sample_terminated or sample_truncated:
#         observation, info = sample_env.reset()
# 
# sample_env.close()