In [None]:
import ale_py
import numpy as np
import pygame
import matplotlib.pyplot as plt

# Initialize Pygame
pygame.init()

# Define the path to your ROM file
rom_path = r"D:\RL\environments\assault.bin"

# Create an ALEInterface instance
env = ale_py.ALEInterface()

# Load the ROM file
env.loadROM(rom_path)

# Get the number of actions
num_actions = env.getLegalActionSet()

# Q-learning parameters
alpha = 0.2  # learning rate
gamma = 0.96  # discount factor
epsilon = 1.0  # exploration rate (starting value)
epsilon_min = 0.1  # minimum exploration rate
epsilon_decay = 0.995  # decay rate for exploration

# Screen dimensions
SCREEN_WIDTH, SCREEN_HEIGHT = 160, 210  # Assault game screen size

# Set up the display
screen = pygame.display.set_mode((SCREEN_WIDTH, SCREEN_HEIGHT))
pygame.display.set_caption("Assault Game")

# Number of epochs
epochs = 50

# Initialize Q-table
q_table = np.zeros((len(num_actions),))

# List to store Q-values for plotting
q_values = []

# Main loop
for epoch in range(epochs):
    state = env.reset_game()
    total_reward = 0

    while not env.game_over():
        # Get current state
        state = env.getScreenRGB()

        # Epsilon-greedy policy for action selection
        if np.random.rand() < epsilon:
            action = np.random.choice(num_actions)
        else:
            action = np.argmax(q_table)

        # Take action and observe reward and next state
        reward = env.act(action)
        total_reward += reward

        # Update Q-value
        next_state_max_q_value = np.max(q_table)
        q_table[action] += alpha * (reward + gamma * next_state_max_q_value - q_table[action])

        # Decay epsilon
        epsilon = max(epsilon_min, epsilon * epsilon_decay)

    print("Epoch:", epoch + 1, "Total Reward:", total_reward)

pygame.quit()

# Save Q-table
np.save('q_table.npy', q_table)


  from pkg_resources import resource_stream, resource_exists
