<a href="https://colab.research.google.com/github/pj0620/google-colab-notebooks/blob/main/Minesweeper_rl.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np
import random
from scipy.signal import convolve2d
%pip install stable-baselines3[extra]

import numpy as np
import gymnasium as gym
from gymnasium import spaces



In [None]:
TOTAL_BOMBS = 10
BOARD_SIZE = 10

KERNAL = np.ones((3, 3))

def get_bombs(board_size, total_bombs):
  bombs = np.zeros(shape=(board_size, board_size))
  placed_bombs = 0
  while placed_bombs < total_bombs:
    i = random.randint(0, board_size-1)
    j = random.randint(0, board_size-1)

    if bombs[i][j] == 0:
      bombs[i][j] = 1
      placed_bombs += 1
  return bombs

def get_values(bombs):
  KERNAL = np.ones((3, 3))
  return convolve2d(bombs, KERNAL, mode='same')

def get_representation(vals, visible):
  return vals * visible, visible

bombs = get_bombs()
visible = np.zeros(shape=(BOARD_SIZE, BOARD_SIZE))
vals = get_values(bombs)

print("# bombs")
print(bombs)
print("# vals")
print(vals)

# bombs
[[0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 1. 0. 0. 0. 0.]
 [0. 0. 0. 0. 1. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 1. 0. 0. 1. 0. 1. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 1. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 1. 0. 0. 0. 1. 0. 0. 0.]
 [1. 0. 0. 0. 0. 0. 0. 0. 0. 0.]]
# vals
[[0. 0. 0. 0. 1. 1. 1. 0. 0. 0.]
 [0. 0. 0. 1. 2. 2. 1. 0. 0. 0.]
 [0. 0. 0. 1. 2. 2. 1. 0. 0. 0.]
 [0. 0. 0. 1. 1. 1. 0. 1. 1. 1.]
 [0. 1. 1. 1. 1. 1. 2. 2. 2. 1.]
 [0. 1. 1. 1. 1. 1. 2. 3. 3. 2.]
 [0. 1. 1. 1. 1. 1. 2. 2. 2. 1.]
 [0. 1. 1. 1. 0. 1. 1. 2. 1. 1.]
 [1. 2. 1. 1. 0. 1. 1. 1. 0. 0.]
 [1. 2. 1. 1. 0. 1. 1. 1. 0. 0.]]


## Custom gymnasium env

In [None]:
class MinesweeperEnvironment(gym.Env):
  # Because of google colab, we cannot implement the GUI ('human' render mode)
  metadata = {"render_modes": ["console"]}

  def __init__(self, board_size=10, total_bombs=10, render_mode="console"):
    super(GoLeftEnv, self).__init__()
    self.render_mode = render_mode

    # Size of the 1D-grid
    self.board_size = board_size
    self.total_bombs = total_bombs

    self.visible = np.zeros((self.board_size, self.board_size))
    self.set_bombs()
    self.set_vals()

    # Define observation space
    self.observation_space = spaces.Tuple((
      spaces.Box(low=0, high=8, shape=(self.board_size, self.board_size), dtype=np.int),
      spaces.Box(low=0, high=1, shape=(self.board_size, self.board_size), dtype=np.int)
    ))

    # Define action space
    self.action_space = spaces.Tuple((
      spaces.Discrete(self.board_size),  # x coordinate
      spaces.Discrete(self.board_size)   # y coordinate
    ))

  def set_bombs(self):
    self.bombs = np.zeros(shape=(self.board_size, self.board_size))
    placed_bombs = 0
    while placed_bombs < self.total_bombs:
      i = random.randint(0, self.board_size-1)
      j = random.randint(0, self.board_size-1)

      if self.bombs[i][j] == 0:
        self.bombs[i][j] = 1
        placed_bombs += 1

  def set_values(self):
    KERNAL = np.ones((3, 3))
    self.vals = convolve2d(self.bombs, KERNAL, mode='same')

  def reset(self):
    # Reset the environment to an initial state
    self.state = (np.zeros((self.board_size, self.board_size), dtype=np.int),
                  np.zeros((self.board_size, self.board_size), dtype=np.int))
    self.visible = np.zeros((self.board_size, self.board_size))
    self.set_bombs()
    self.set_vals()

    self.state = (self.visible * self.vals, self.vals)
    return self.state

  def step(self, action):
    # Implement the logic for taking a step in the environment
    x, y = action
    # Example logic (you'll implement your own)
    reward = 0  # Calculate reward based on the action taken
    done = False  # Determine if the episode is done
    return self.state, reward, done, {}

  def render(self, mode='human'):
    # Optionally implement visualization
    pass


In [None]:
env = MinesweeperEnvironment(board_size=10, total_bombs=10)

check_env(env)