In [1]:
import gymnasium as gym
from gymnasium import spaces
from Agent import NeuralAgent, plot_return
import matplotlib.pyplot as plt

In [2]:
# wrapper helper
class ReduceActionSpace(gym.Wrapper):
    def __init__(self, env, actions):
        super().__init__(env)
        self.action_map = actions
        # Define the new action space
        self.action_space = spaces.Discrete(len(actions))

    def step(self, action):
        # Map the action from the reduced space to the original space
        action = self.action_map[action]
        return self.env.step(action)

In [3]:
# code to set the exploration rate to 0 and just display the frames and info shown by the "demo" functions in Cognitive_Hypotheses.py, if they are uncommented.
do_show_demo_functions = False
if do_show_demo_functions:
    ###################################################################################################
    # Initialize environment and extract all relevant components
    
    train_env = gym.make("ALE/Berzerk-v5", mode=1, repeat_action_probability=0, obs_type="ram", render_mode="rgb_array")
    CUSTOM_ACTION_SPACE = [1, 2, 3, 4, 5]
    train_env = ReduceActionSpace(train_env, CUSTOM_ACTION_SPACE)
    
    # assuming discrete action space and 1-dimensional observation space
    ACTION_SPACE_SIZE = train_env.action_space.n
    print(f"Action space size: {ACTION_SPACE_SIZE}")
    OBSERVATION_SPACE_SIZE = train_env.observation_space.shape[0]
    print(f"Observation space size: {OBSERVATION_SPACE_SIZE}")
    
    #################################################################################################
    # Initialize agent
    
    folder_to_save_checkpoints = "./berzerk_show_demo"
    gamma = 0.99
    lr = .000001
    max_storage_size = 65000
    batch_size = 64
    exploration_rate_decay = 0.0
    use_cognition = True
    agent = NeuralAgent(OBSERVATION_SPACE_SIZE, ACTION_SPACE_SIZE, folder_to_save_checkpoints, gamma, lr, max_storage_size, batch_size, exploration_rate_decay, use_cognition)
    
    #################################################################################################
    # Load model into agent if desired
    
    load_agent = False
    load_file = ""
    
    if load_agent:
        agent.load(load_file)
    
    ########################################################################################################
    # Train the agent if desired. Will display metrics per batch of episodes, as well as a plot of returns for each episode when training is done
    
    perform_training = True
    
    if perform_training:
        print(f"Device: {agent.device}\nLearning Rate: {agent.lr}\nBatch Size: {agent.batch_size}\nExploration Rate Decay: {agent.exploration_rate_decay}\nMaximum Memory Size: {agent.max_storage_size}")
        np_filename = agent.simulate(train_env, 1000)
        plot_return(np_filename)
    
    train_env.close()
    ########################################################################################################
    # Inject the vanilla RL agent into the gymnasium render loop, and visualize (in a human way) how well it plays the game:
    
    do_render = False
    
    if do_render:
        env_human = gym.make("ALE/Berzerk-v5", mode=1, repeat_action_probability=0, obs_type="ram", render_mode="human")
        env_human = ReduceActionSpace(env_human, CUSTOM_ACTION_SPACE)
        agent.render_agent_game(env_human, 2000)

In [4]:
# code to show the cognitive portion in action with a fully untrained agent
do_show_untrained = False
if do_show_untrained:
    ###################################################################################################
    # Initialize environment and extract all relevant components
    
    train_env = gym.make("ALE/Berzerk-v5", mode=1, repeat_action_probability=0, obs_type="ram", render_mode="rgb_array")
    CUSTOM_ACTION_SPACE = [1, 2, 3, 4, 5]
    train_env = ReduceActionSpace(train_env, CUSTOM_ACTION_SPACE)
    
    # assuming discrete action space and 1-dimensional observation space
    ACTION_SPACE_SIZE = train_env.action_space.n
    print(f"Action space size: {ACTION_SPACE_SIZE}")
    OBSERVATION_SPACE_SIZE = train_env.observation_space.shape[0]
    print(f"Observation space size: {OBSERVATION_SPACE_SIZE}")
    
    #################################################################################################
    # Initialize agent
    
    folder_to_save_checkpoints = "./berzerk_show_untrained"
    gamma = 0.99
    lr = .000001
    max_storage_size = 65000
    batch_size = 64
    exploration_rate_decay = 0.5
    use_cognition = True
    agent = NeuralAgent(OBSERVATION_SPACE_SIZE, ACTION_SPACE_SIZE, folder_to_save_checkpoints, gamma, lr, max_storage_size, batch_size, exploration_rate_decay, use_cognition)
    
    #################################################################################################
    # Load model into agent if desired
    
    load_agent = False
    load_file = ""
    
    if load_agent:
        agent.load(load_file)
    
    ########################################################################################################
    # Train the agent if desired. Will display metrics per batch of episodes, as well as a plot of returns for each episode when training is done
    
    perform_training = False
    
    if perform_training:
        print(f"Device: {agent.device}\nLearning Rate: {agent.lr}\nBatch Size: {agent.batch_size}\nExploration Rate Decay: {agent.exploration_rate_decay}\nMaximum Memory Size: {agent.max_storage_size}")
        np_filename = agent.simulate(train_env, 1000)
        plot_return(np_filename)
    
    train_env.close()
    ########################################################################################################
    # Inject the vanilla RL agent into the gymnasium render loop, and visualize (in a human way) how well it plays the game:
    
    do_render = True
    
    if do_render:
        env_human = gym.make("ALE/Berzerk-v5", mode=1, repeat_action_probability=0, obs_type="ram", render_mode="human")
        env_human = ReduceActionSpace(env_human, CUSTOM_ACTION_SPACE)
        agent.render_agent_game(env_human, 2000)

In [None]:
# code to actually train and use the cognition-enhanced agent
do_berzerk_cognition = True
if do_berzerk_cognition:
    ###################################################################################################
    # Initialize environment and extract all relevant components
    
    train_env = gym.make("ALE/Berzerk-v5", mode=1, repeat_action_probability=0, obs_type="ram", render_mode="rgb_array")
    CUSTOM_ACTION_SPACE = [1, 2, 3, 4, 5]
    train_env = ReduceActionSpace(train_env, CUSTOM_ACTION_SPACE)
    
    # assuming discrete action space and 1-dimensional observation space
    ACTION_SPACE_SIZE = train_env.action_space.n
    print(f"Action space size: {ACTION_SPACE_SIZE}")
    OBSERVATION_SPACE_SIZE = train_env.observation_space.shape[0]
    print(f"Observation space size: {OBSERVATION_SPACE_SIZE}")
    
    #################################################################################################
    # Initialize agent
    
    folder_to_save_checkpoints = "./berzerk_COGNITION_V4"
    gamma = 0.75
    lr = .00025
    max_storage_size = 85000
    batch_size = 64
    exploration_rate_decay = .99999
    use_cognition = True
    agent = NeuralAgent(OBSERVATION_SPACE_SIZE, ACTION_SPACE_SIZE, folder_to_save_checkpoints, gamma, lr, max_storage_size, batch_size, exploration_rate_decay, use_cognition)
    
    #################################################################################################
    # Load model into agent if desired
    
    load_agent = True
    load_file = "./berzerk_COGNITION_V4/05-04_12-18-53_neural_agent_35.pth"
    
    if load_agent:
        agent.load(load_file)
    
    ########################################################################################################
    # Train the agent if desired. Will display metrics per batch of episodes, as well as a plot of returns for each episode when training is done
    
    perform_training = False
    
    if perform_training:
        print(f"Device: {agent.device}\nLearning Rate: {agent.lr}\nBatch Size: {agent.batch_size}\nExploration Rate Decay: {agent.exploration_rate_decay}\nMaximum Memory Size: {agent.max_storage_size}")
        np_filename = agent.simulate(train_env, 3000)
        plot_return(np_filename)
    
    train_env.close()
    ########################################################################################################
    # Inject the vanilla RL agent into the gymnasium render loop, and visualize (in a human way) how well it plays the game:
    
    do_render = True
    
    if do_render:
        env_human = gym.make("ALE/Berzerk-v5", mode=1, repeat_action_probability=0, obs_type="ram", render_mode="human")
        env_human = ReduceActionSpace(env_human, CUSTOM_ACTION_SPACE)
        agent.render_agent_game(env_human, 2000)

In [None]:
# try the model in the middle that seemed to do well