In [6]:
import gymnasium as gym
import ale_py
import numpy as np
import time
from stable_baselines3 import DQN	
import matplotlib.pyplot as plt
from collections import deque

# Action masking
from gymnasium import ActionWrapper


In [7]:
class SupermanActionReducer(ActionWrapper):
    def __init__(self, env, allowed_actions=None):
        super().__init__(env)
        
        if allowed_actions is None:
            # Basic movement only: Cardinal directions & diagonal directions
            allowed_actions = [2,3,4,5, 6,7,8,9]
        
        self.allowed_actions = allowed_actions
        self.action_space = gym.spaces.Discrete(len(allowed_actions))
        
    def action(self, action):
        # Map the reduced action index back to the original action
        return self.allowed_actions[action]

In [8]:
# # ACTIONS
# 0: NOOP
# 1: FIRE

# 2: UP
# 3: RIGHT
# 4: LEFT
# 5: DOWN
# 6: UPRIGHT
# 7: UPLEFT
# 8: DOWNRIGHT
# 9: DOWNLEFT

# 10: UPFIRE
# 11: RIGHTFIRE
# 12: LEFTFIRE
# 13: DOWNFIRE
# 14: UPRIGHTFIRE
# 15: UPLEFTFIRE
# 16: DOWNRIGHTFIRE
# 17: DOWNLEFTFIRE

In [9]:
env = gym.make("ALE/Superman-v5", render_mode=None)
# env = SupermanActionReducer(env)

In [18]:
test_actions = [
    ("UP", 2),
    ("RIGHT", 3), 
    ("LEFT", 4), 
    ("DOWN", 5),


    ("UPRIGHT", 6),
    ("UPLEFT", 7),
    ("DOWNRIGHT", 8),
    ("DOWNLEFT", 9),
    
    ("UPFIRE", 10),
    ("RIGHTFIRE", 11),
    ("LEFTFIRE", 12),
    ("DOWNFIRE", 13),
    ("UPRIGHTFIRE", 14),
    ("UPLEFTFIRE", 15),
    ("DOWNRIGHTFIRE", 16),
    ("DOWNLEFTFIRE", 17)
]

In [19]:
# Test specific actions in specific states
for action_name, action in test_actions:
    obs, _ = env.reset()
    total_reward = 0
    for step in range(100):
        obs, reward, done, truncated, info = env.step(action)
        total_reward += reward
        if reward != 0:
            print(f"Action {action_name} got reward {reward} at step {step}")
        if done:
            break
    print(f"Action {action_name}: Total reward = {total_reward}")

Action UP: Total reward = 0.0
Action RIGHT: Total reward = 0.0
Action LEFT: Total reward = 0.0
Action DOWN: Total reward = 0.0
Action UPRIGHT: Total reward = 0.0
Action UPLEFT: Total reward = 0.0
Action DOWNRIGHT: Total reward = 0.0
Action DOWNLEFT: Total reward = 0.0
Action UPFIRE: Total reward = 0.0
Action RIGHTFIRE: Total reward = 0.0
Action LEFTFIRE: Total reward = 0.0
Action DOWNFIRE: Total reward = 0.0
Action UPRIGHTFIRE: Total reward = 0.0
Action UPLEFTFIRE: Total reward = 0.0
Action DOWNRIGHTFIRE: Total reward = 0.0
Action DOWNLEFTFIRE: Total reward = 0.0


In [20]:
env = gym.make("ALE/Superman-v5", render_mode=None)
obs, _ = env.reset()

action_dict = {0: 'NOOP', 1: 'FIRE', 2: 'UP', 3: 'RIGHT', 4: 'LEFT', 5: 'DOWN', 6: 'UPRIGHT', 7: 'UPLEFT', 8: 'DOWNRIGHT', 9: 'DOWNLEFT', 10: 'UPFIRE', 11: 'RIGHTFIRE', 12: 'LEFTFIRE', 13: 'DOWNFIRE', 14: 'UPRIGHTFIRE', 15: 'UPLEFTFIRE', 16: 'DOWNRIGHTFIRE', 17: 'DOWNLEFTFIRE'}

In [24]:
for _ in range(1000):
    action = env.action_space.sample()
    action_name = action_dict[action]
    print(f"Action: {action_name}\n")
    obs, reward, done, truncated, info = env.step(action)
    
    # Check if score is available in info
    # print(obs)
    print(reward)
    # print(done)
    # print(truncated)
    print(info)
    # if 'score' in info:
        # print(f"Score: {info['score']}, Reward: {reward}")
    # else:
        # print("no score in info")
        
    if done:
        break

Action: UPRIGHTFIRE

0.0
{'lives': 1, 'episode_frame_number': 12004, 'frame_number': 12004}
Action: RIGHT

0.0
{'lives': 1, 'episode_frame_number': 12008, 'frame_number': 12008}
Action: UP

0.0
{'lives': 1, 'episode_frame_number': 12012, 'frame_number': 12012}
Action: LEFTFIRE

0.0
{'lives': 1, 'episode_frame_number': 12016, 'frame_number': 12016}
Action: UPRIGHT

0.0
{'lives': 1, 'episode_frame_number': 12020, 'frame_number': 12020}
Action: UPRIGHTFIRE

0.0
{'lives': 1, 'episode_frame_number': 12024, 'frame_number': 12024}
Action: UPRIGHTFIRE

0.0
{'lives': 1, 'episode_frame_number': 12028, 'frame_number': 12028}
Action: FIRE

0.0
{'lives': 1, 'episode_frame_number': 12032, 'frame_number': 12032}
Action: DOWNRIGHT

0.0
{'lives': 1, 'episode_frame_number': 12036, 'frame_number': 12036}
Action: DOWNLEFTFIRE

0.0
{'lives': 1, 'episode_frame_number': 12040, 'frame_number': 12040}
Action: LEFT

0.0
{'lives': 1, 'episode_frame_number': 12044, 'frame_number': 12044}
Action: RIGHT

0.0
{'live

In [None]:
# Likely Positive Rewards:
# Points for capturing criminals
# Points for repairing the bridge
# Large reward for completing the game

# Likely Negative Rewards:
# Points for touching kryptonite