In [None]:
!pip install keras



In [None]:
!pip install tensorflow

In [None]:
pip install gym

In [15]:
import keras
from keras.layers import Dense, Activation ,Conv2D, Flatten,Reshape 
from keras.models import Sequential, load_model
from keras.optimizers import Adam
import numpy as np
# from google.colab import drive
# drive.mount('/content/drive')

In [16]:

class ReplayBuffer(object):
    def __init__(self, max_size, input_shape, n_actions, discrete=False):
        self.mem_size = max_size
        self.mem_cntr = 0
        self.discrete = discrete
        self.state_memory = np.zeros((self.mem_size, input_shape))
        self.new_state_memory = np.zeros((self.mem_size, input_shape))
        dtype = np.int8 if self.discrete else np.float32
        self.action_memory = np.zeros((self.mem_size, n_actions), dtype=dtype)
        self.reward_memory = np.zeros(self.mem_size)
        self.terminal_memory = np.zeros(self.mem_size, dtype=np.float32)

    def store_transition(self, state, action, reward, state_, done):
        index = self.mem_cntr % self.mem_size
        self.state_memory[index] = state
        self.new_state_memory[index] = state_
        
        if self.discrete:
            actions = np.zeros(self.action_memory.shape[1])
            actions[action] = 1.0
            self.action_memory[index] = actions
        else:
            self.action_memory[index] = action
        self.reward_memory[index] = reward
        self.terminal_memory[index] = 1 - done
        self.mem_cntr += 1

    def sample_buffer(self, batch_size):
        max_mem = min(self.mem_cntr, self.mem_size)
        batch = np.random.choice(max_mem, batch_size)

        states = self.state_memory[batch]
        actions = self.action_memory[batch]
        rewards = self.reward_memory[batch]
        states_ = self.new_state_memory[batch]
        terminal = self.terminal_memory[batch]

        return states, actions, rewards, states_, terminal

def network(lr, n_actions):
    model = Sequential([
                Reshape((5, 5, 1), input_shape=(25,)),
                Conv2D(64, (3,3), activation='relu', padding='same'),
                Conv2D(64, (3,3), activation='relu', padding='same'),
                Conv2D(64, (3,3), activation='relu', padding='same'),
                
                Flatten(),
                Dense(64, activation='relu'),
                Dense(n_actions, activation='linear')])

    model.compile(optimizer=Adam(lr=lr), loss='mse')

    return model

class Agent(object):
    def __init__(self, alpha, gamma, n_actions, epsilon, batch_size,
                 input_dims, epsilon_dec=0.9985,  epsilon_end=0.01,
                 mem_size=100000, fname='best.h5'):
      #store in the trained model in the best.h5 file in the working directory
  
        self.action_space = [i for i in range(n_actions)]
        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_dec = epsilon_dec
        self.epsilon_min = epsilon_end
        self.batch_size = batch_size
        self.model_file = fname
        self.memory = ReplayBuffer(mem_size, input_dims, n_actions,
                                   discrete=True)
        self.q_eval = network(alpha, n_actions)

    def remember(self, state, action, reward, new_state, done):
        self.memory.store_transition(state, action, reward, new_state, done)

    #Using the network to choose an action
    def choose_action(self, state):
        state = state[np.newaxis, :]
        rand = np.random.random()
        if rand < self.epsilon:
            action = np.random.choice(self.action_space)
        else:
            actions = self.q_eval.predict(state)
            action = np.argmax(actions)

        return action

    #learning what the memory have stored with a batch size
    def learn(self):
        if self.memory.mem_cntr > self.batch_size:
            state, action, reward, new_state, done = \
                                          self.memory.sample_buffer(self.batch_size)

            action_values = np.array(self.action_space, dtype=np.int8)
            action_indices = np.dot(action, action_values)

            q_eval = self.q_eval.predict(state)

            q_next = self.q_eval.predict(new_state)

            q_target = q_eval.copy()

            batch_index = np.arange(self.batch_size, dtype=np.int32)

            #Using the Bellman equation for calculating the target Q-values as the optimal Q-values 
            q_target[batch_index, action_indices] = reward + self.gamma*np.max(q_next, axis=1)*done

            _ = self.q_eval.fit(state, q_target, verbose=0)

            self.epsilon = self.epsilon*self.epsilon_dec if self.epsilon > self.epsilon_min else self.epsilon_min

    def save_model(self):
        self.q_eval.save(self.model_file)

    def load_model(self):
        self.q_eval = load_model(self.model_file)


In [17]:
class Cell:
    def __init__(self, content, x, y):
        self.content = content
        self.flagged = False
        self.open = False
        self.x = x
        self.y = y
    
    #reveal a cell (open it)
    def reveal(self):
        if self.open:
            print(f"cell ({self.x},{self.y}) is already open")
            return
        if self.flagged:
            print(f"cell ({self.x},{self.y}) is already flagged")
            return
        self.open = True
    
    #flag a cell
    def flag(self):
        if self.flagged:
            print(f"cell ({self.x},{self.y}) is already flagged")
            return
        if self.open:
            print(f"cell ({self.x},{self.y}) cannot be flagged because it is already open")
            return
        self.flagged = True

    def getContent(self):
      if self.open==True:
        return self.content
      else:
        return -2


In [46]:
import random

class MinesweeperMap:
    def __init__(self, difficulty): #map generation
        difficulty = difficulty.lower()
        if difficulty == "hard":
            self.numberOfBombs = 85   
            self.map = [[Cell(0, r, c) for c in range(30)] for r in range(16)]
        elif difficulty == "medium":
            self.numberOfBombs = 40  
            self.map = [[Cell(0, r, c) for c in range(16)] for r in range(16)]
        else: # easy level
            self.numberOfBombs = 10
            self.map = [[Cell(0, r, c) for c in range(9)] for r in range(9)]
            
        
        cords = [ [r, c] for r in range(len(self.map)) for c in range(len(self.map[0]))  ]
        

        cords.pop(0) # removing the first cords (0,0) so it can't be a bomb
        cords.pop(1)
        cords.pop(len(self.map[0]))
        
        # setting up the locations of the bomb cells randomly
        for i in range(self.numberOfBombs):
            rand = random.randint(0, len(cords)-1)
            randArr = cords[rand]
            cords.pop(rand)
            self.map[randArr[0]][randArr[1]] = Cell(-1, randArr[0], randArr[1])
        
        for r in range(len(self.map)):
            for c in range(len(self.map[0])):
                if self.map[r][c].content != -1:
                    n = self.numberOfSurroundingBombs(r, c)
                    self.map[r][c] = Cell(n, r, c)
    

    #only to generate the map (not used in SolverCSP)
    def numberOfSurroundingBombs(self, x, y):
        #this is a useless cell just to pass the x,y coords
        trashCell = Cell(9999, x, y)
        #get the neighbors of the cell
        neighbors = self.getNeighbors(trashCell)
        n = 0
    # count the number of bombs among the neighbors
        for cell in neighbors:
              if cell is not None and cell.content == -1:
                n = n + 1
        return n

    # returns the surrounding neighboring cells of a cell.
    # a cell can have between 3, 5, or 8 neighbors, depending on its location on the map.
    def getNeighbors(self, c):
        neighbors = []
        x, y = c.x, c.y
        if x+1 < len(self.map): # x+1, y
            neighbors.append(self.map[x+1][y])
        if x-1 >= 0: # x-1, y
            neighbors.append(self.map[x-1][y])
        if y-1 >= 0: # x, y-1
            neighbors.append(self.map[x][y-1])
        if y+1 < len(self.map[0]): # x, y+1
            neighbors.append(self.map[x][y+1])
        if x+1 < len(self.map) and y+1 < len(self.map[0]): # x+1, y+1
            neighbors.append(self.map[x+1][y+1])
        if x-1 >= 0 and y-1 >= 0: # x-1, y-1
            neighbors.append(self.map[x-1][y-1])
        if x+1 < len(self.map) and y-1 >= 0: # x+1, y-1
            neighbors.append(self.map[x+1][y-1])
        if x-1 >= 0 and y+1 < len(self.map[0]): # x-1, y+1
            neighbors.append(self.map[x-1][y+1])
        return neighbors

    #prints the current state of the map
    def drawMap(self):
      for i in range(len(self.map)):
          for l in range(len(self.map[0])):
            print("----", end="")
          print("---")
          for j in range(len(self.map[0])):
              if self.map[i][j].open:
                if (self.map[i][j].content==-1):
                  print(" | B", end="")
                else:
                  print(" | " + str(self.map[i][j].content), end="")
              else:
                if self.map[i][j].flagged:
                    print(" | X", end="")
                else:
                    print(" |  ", end="")
                #print(" | " +str(self.map[i][j].content), end="")  #to print the uncovered version of the map
          print(" | ")
      for l in range(len(self.map[0])):
        print("----", end="")
      print("---")
    
    
    # the functions below functions are made for the SolverCSP

    # return a list of the surrounding flags of a cell
    def surroundingFlags(self, c):
      list = self.getNeighbors(c)
      bombedCells = []
      for cell in list:
        if cell.flagged:
          bombedCells.append(cell)
      return bombedCells

    def numberOfSurroundingFlags(self, c):
      return len(self.surroundingFlags(c))

    # returns a list of the surrounding unexplored cells of a cell (unexplored means not flagged and not opened)
    def surroundingUnexplored(self, c):
      list = self.getNeighbors(c)
      unexploredCells = []
      for cell in list:
        if not cell.flagged and not cell.open:
          unexploredCells.append(cell)
      return unexploredCells

    def numberOfSurroundingUnexplored(self, c):
      return len(self.surroundingUnexplored(c))

    # return the number of flagged cells in the whole map
    def getNumberOfFlags(self):
      n = 0
      for i in range(len(self.map)):
        for j in range(len(self.map[0])):
          if self.map[i][j].flagged:
            #print("flagged")
            n = n + 1
          #else:
            #print("not")
      #print("done the loop")
      return n

    # returns the number of explored cells in the whole map (explored means either flagged or opened)
    def getNumberOfExplored(self):
      n = 0
      for i in range(len(self.map)):
        for j in range(len(self.map[0])):
          if self.map[i][j].flagged or self.map[i][j].open:
            n = n+ 1
      return n

    #returns a list of all unexplored cells in the whole map
    def getUnexploredCells(self):
        arr = []
        for i in range(len(self.map)):
            for j in range(len(self.map[0])):
                if not self.map[i][j].flagged and not self.map[i][j].open:
                    arr.append(self.map[i][j])
        return arr
    
    #returns whether a cell has any unexplored neighbors
    def hasUnexploredNeighbor(self, c):
        neighbors = self.getNeighbors(c)
        for cell in neighbors:
            if not cell.flagged and not cell.open:
                return True
        return False

    #returns whether a cell has any explored neighbors
    def hasExploredNeighbor(self, c):
        neighbors = self.getNeighbors(c)
        for cell in neighbors:
            if ((not cell.flagged) and (cell.open)):
                return True
        return False
    
    #return true if a cell is open and has unexplored neighbors
    def openAndHasUnexploredNeighbors(self, c):
        if c.open and self.hasUnexploredNeighbor(c):
            return True
        return False
    
    #returns a list of open neighbors of a cell, and at least one of those open neighbors still have at least one unexplored neighboring cell
    def openNeighborsWithUnexploredNeighbors(self, c):
        neighbors = self.getNeighbors(c)
        wantedNeighbors = []
        for cell in neighbors:
            if self.hasUnexploredNeighbor(cell) and cell.open and not cell.flagged:
                wantedNeighbors.append(cell)
        return wantedNeighbors

     # returns true if a cell has any open neighbors and at least one of those neighbors have at least one unexplored neighbor.
    def hasOpenNeighborsWithUnexploredNeighbors(self, c):
        neighbors = self.getNeighbors(c)
        for cell in neighbors:
            if self.hasUnexploredNeighbor(cell) and cell.open and not cell.flagged:
                return True
        return False
    
    # get the flagged cells of a cell and returns their surrounding neighbors that have at least one unexplored cell.
    def flaggedNeighborsWithUnexploredNeighbors(self, c):
        neighbors = self.getNeighbors(c)
        wantedNeighbors = []
        for cell in neighbors:
            if self.hasOpenNeighborsWithUnexploredNeighbors(cell) and cell.flagged and not cell.open:
                arrr = self.openNeighborsWithUnexploredNeighbors(cell)
                for i in range(len(arrr)):
                    wantedNeighbors.append(arrr[i])
        return wantedNeighbors
    
    # returns a random not opened and not flagged cell from the map
    def getRandomCell(self):
        undiscovered = []
        for i in range(len(self.map)):
            for j in range(len(self.map[0])):
                if (not self.map[i][j].flagged) and (not self.map[i][j].open):
                    undiscovered.append(self.map[i][j])
        if(len(undiscovered)>0):
            rand = random.randint(0, len(undiscovered)-1)
            return undiscovered[rand]
        else:
            return -1
    
    #checks whether a cell has at least 3 explored neighbors
    def hasAtLeastThreeExploredNeighbors(self, c):
        neighbors = self.getNeighbors(c)
        count = 0
        for cell in neighbors:
            if ((cell.flagged) or (cell.open)):
                count = count + 1
                if(count==3):
                    return True
        return False
    
    
    #returns a baord of 5x5 for RL
    def returnsFiveBoard(self):
        take = [2,3,4,5,6]
        smallBoard = []
        for i in take:
            for j in take:
                if self.map[i][j].open==False and self.map[i][j].flagged==False and self.hasAtLeastThreeExploredNeighbors(self.map[i][j]):
                    for m in [i-2,i-1,i,i+1,i+2]:
                        smallBoardRow = []
                        for n in [j-2,j-1,j,j+1,j+2]:
                            smallBoardRow.append(self.map[m][n])
                        smallBoard.append(smallBoardRow)
                    return smallBoard
        return smallBoard

    

 **Use the following cell to generate the training data**

In [47]:
#for RL

import copy

class SolverCSP:
    fiveBoards = []
        
    def solveMinesweeper(map: MinesweeperMap):
        stack = []
        map.map[0][0].reveal()
        #map.drawMap()
        stack.append(map.map[0][0])
       
        while (map.numberOfBombs != map.getNumberOfFlags()):
            while len(stack)!=0:
                stack = SolverCSP.firstLevel(map, stack)
                
            stack = SolverCSP.secondLevel(map, stack)
            if len(stack)==0:
                stack = SolverCSP.thirdLevel(map, stack)
                if len(stack)==0:
                    if map.numberOfBombs == map.getNumberOfFlags():
                        return True
                    else:
                      if ((len(map.map) * len(map.map[0])) - map.getNumberOfExplored()) == map.numberOfBombs - map.getNumberOfFlags():
                        arr = map.getUnexploredCells()
                        for i in range(len(arr)):
                            map.map[arr[i].x][arr[i].y].reveal()
                        return True
                      xxxx = map.returnsFiveBoard()
                      if(len(xxxx)!=0):
                          SolverCSP.fiveBoards.append(xxxx)
                      return True
#                       c = map.getRandomCell()
#                       print("RANDOMLY")
#                       if c.content == -1:
#                           map.map[c.x][c.y].reveal()
#                           map.drawMap()
#                           return False
#                       else:
#                           map.map[c.x][c.y].reveal()
#                           stack.append(map.map[c.x][c.y])
#                           map.drawMap()
#                           openNeighborsWithUnexploredNeighbors = map.openNeighborsWithUnexploredNeighbors(map.map[c.x][c.y])
#                           for i in range(len(openNeighborsWithUnexploredNeighbors)):
#                               stack.append(map.map[openNeighborsWithUnexploredNeighbors[i].x][openNeighborsWithUnexploredNeighbors[i].y])
        
        return True
    

    def firstLevel(map: MinesweeperMap, stack):
      top = stack.pop()
      if top.content == map.numberOfSurroundingUnexplored(top) + map.numberOfSurroundingFlags(top):
        unexploredNeighbors = map.surroundingUnexplored(top)
        for neighbor in unexploredNeighbors:
            map.map[neighbor.x][neighbor.y].flag()
            #map.drawMap()
        flaggedNeighborsWithUnexploredNeighbors = map.flaggedNeighborsWithUnexploredNeighbors(top)
        for flaggedNeighbor in flaggedNeighborsWithUnexploredNeighbors:
            stack.append(map.map[flaggedNeighbor.x][flaggedNeighbor.y])
      if top.content == map.numberOfSurroundingFlags(top):
        unexploredNeighbors = map.surroundingUnexplored(top)
        for neighbor in unexploredNeighbors:
            map.map[neighbor.x][neighbor.y].reveal()
            #map.drawMap()
            stack.append(map.map[neighbor.x][neighbor.y])
        openNeighborsWithUnexploredNeighbors = map.openNeighborsWithUnexploredNeighbors(top)
        for openNeighbor in openNeighborsWithUnexploredNeighbors:
            stack.append(map.map[openNeighbor.x][openNeighbor.y])
      return stack


    def secondLevel(map: MinesweeperMap, stack):
      for i in range(len(map.map)):
        for j in range(len(map.map[0])):
            if map.openAndHasUnexploredNeighbors(map.map[i][j]) and map.hasOpenNeighborsWithUnexploredNeighbors(map.map[i][j]):
                neighb = map.openNeighborsWithUnexploredNeighbors(map.map[i][j])
                for k in range(len(neighb)):
                    first = map.surroundingUnexplored(map.map[i][j])
                    firstContent = map.map[i][j].content
                    second = map.surroundingUnexplored(neighb[k])
                    secondContent = neighb[k].content
                    l = 0
                    while l < len(first):
                        q = 0
                        while q < len(second):
                            if first[l].x== second[q].x and first[l].y == second[q].y:
                                first.pop(l)
                                second.pop(q)
                                l = l-1
                                break
                            q = q+1
                        l =l+1

                    if len(first)==0 and len(second)!=0:
                        res = secondContent - firstContent - map.numberOfSurroundingFlags(neighb[k]) + map.numberOfSurroundingFlags(map.map[i][j])
                        if res == 0:
                            for p in range(len(second)):
                                map.map[second[p].x][second[p].y].reveal()
                                #map.drawMap()
                                stack.append(map.map[second[p].x][second[p].y])
                                openNeighborsWithUnexploredNeighbors = map.openNeighborsWithUnexploredNeighbors(second[p])
                                for h in range(len(openNeighborsWithUnexploredNeighbors)):
                                    stack.append(map.map[openNeighborsWithUnexploredNeighbors[h].x][openNeighborsWithUnexploredNeighbors[h].y])
                            return stack
                        if (len(second) + map.numberOfSurroundingFlags(neighb[k]) + firstContent - map.numberOfSurroundingFlags(map.map[i][j])) == secondContent:
                            for p in range(len(second)):
                                map.map[second[p].x][second[p].y].flag()
                                #map.drawMap()
                                stack.append(map.map[second[p].x][second[p].y])
                                flaggedNeighborsWithUnexploredNeighbors = map.flaggedNeighborsWithUnexploredNeighbors(second[p])
                                for h in range(len(flaggedNeighborsWithUnexploredNeighbors)):
                                    stack.append(map.map[flaggedNeighborsWithUnexploredNeighbors[h].x][flaggedNeighborsWithUnexploredNeighbors[h].y])
                            return stack
                    if len(second)==0 and len(first)!=0:
                        res = firstContent - secondContent - map.numberOfSurroundingFlags(map.map[i][j]) + map.numberOfSurroundingFlags(neighb[k])
                        if res == 0:
                            for p in range(len(first)):
                                map.map[first[p].x][first[p].y].reveal()
                                #map.drawMap()
                                stack.append(map.map[first[p].x][first[p].y])
                                openNeighborsWithUnexploredNeighbors = map.openNeighborsWithUnexploredNeighbors(first[p])
                                for h in range(len(openNeighborsWithUnexploredNeighbors)):
                                    stack.append(map.map[openNeighborsWithUnexploredNeighbors[h].x][openNeighborsWithUnexploredNeighbors[h].y])
                            return stack
                        if (len(first) + map.numberOfSurroundingFlags(map.map[i][j]) + secondContent - map.numberOfSurroundingFlags(neighb[k])) == firstContent:
                            for p in range(len(first)):
                                map.map[first[p].x][first[p].y].flag()
                                #map.drawMap()
                                stack.append(map.map[first[p].x][first[p].y])
                                flaggedNeighborsWithUnexploredNeighbors = map.flaggedNeighborsWithUnexploredNeighbors(first[p])
                                for h in range(len(flaggedNeighborsWithUnexploredNeighbors)):
                                    stack.append(map.map[flaggedNeighborsWithUnexploredNeighbors[h].x][flaggedNeighborsWithUnexploredNeighbors[h].y])
                            return stack

                    if len(second)==1 and len(first)==1:
                        res = secondContent - firstContent - map.numberOfSurroundingFlags(neighb[k]) + map.numberOfSurroundingFlags(map.map[i][j])
                        if res==-1:
                          map.map[second[0].x][second[0].y].reveal()
                          #map.drawMap()
                          map.map[first[0].x][first[0].y].flag()
                          #map.drawMap()
                          stack.append(map.map[second[0].x][second[0].y])
                          stack.append(map.map[first[0].x][first[0].y])
                          openNeighborsWithUnexploredNeighbors = map.openNeighborsWithUnexploredNeighbors(second[0])
                          for h in range(len(openNeighborsWithUnexploredNeighbors)):
                              stack.append(map.map[openNeighborsWithUnexploredNeighbors[h].x][openNeighborsWithUnexploredNeighbors[h].y])
                          flaggedNeighborsWithUnexploredNeighbors = map.flaggedNeighborsWithUnexploredNeighbors(first[0])
                          for t in range(len(flaggedNeighborsWithUnexploredNeighbors)):
                              stack.append(map.map[flaggedNeighborsWithUnexploredNeighbors[t].x][flaggedNeighborsWithUnexploredNeighbors[t].y])
                          return stack
                        if res==1:
                          map.map[second[0].x][second[0].y].flag()
                          #map.drawMap()
                          map.map[first[0].x][first[0].y].reveal()
                          #map.drawMap()
                          stack.append(map.map[second[0].x][second[0].y])
                          stack.append(map.map[first[0].x][first[0].y])
                          openNeighborsWithUnexploredNeighbors = map.openNeighborsWithUnexploredNeighbors(first[0])
                          for h in range(len(openNeighborsWithUnexploredNeighbors)):
                              stack.append(map.map[openNeighborsWithUnexploredNeighbors[h].x][openNeighborsWithUnexploredNeighbors[h].y])
                          flaggedNeighborsWithUnexploredNeighbors = map.flaggedNeighborsWithUnexploredNeighbors(second[0])
                          for t in range(len(flaggedNeighborsWithUnexploredNeighbors)):
                              stack.append(map.map[flaggedNeighborsWithUnexploredNeighbors[t].x][flaggedNeighborsWithUnexploredNeighbors[t].y])
                          return stack
      return stack


    def thirdLevel(map: MinesweeperMap, stack):
       map1 = copy.deepcopy(map)
       stack1 = []
       for i in range(len(map1.map)):
        for j in range(len(map1.map[0])):
            cell = map1.map[i][j]
            if not cell.open and not cell.flagged and map1.hasExploredNeighbor(cell):
              cell.flag()
              flaggedNeighborsWithUnexploredNeighbors = map1.flaggedNeighborsWithUnexploredNeighbors(cell)
              for h in range(len(flaggedNeighborsWithUnexploredNeighbors)):
                stack1.append(map1.map[flaggedNeighborsWithUnexploredNeighbors[h].x][flaggedNeighborsWithUnexploredNeighbors[h].y])
              while len(stack1)!=0:
                stack1 = SolverCSP.firstLevelMod(map1, stack1)
              res = SolverCSP.checkMistakes(map1)
              if(res):
                #print("USED 3")
                #print("i=" + str(i) + " j=" + str(j))
                cell.flagged=False
                map.map[i][j].reveal()
                #map.drawMap()
                stack.append(map.map[i][j])
                openNeighborsWithUnexploredNeighbors = map.openNeighborsWithUnexploredNeighbors(map.map[i][j])
                for h in range(len(openNeighborsWithUnexploredNeighbors)):
                  stack.append(map.map[openNeighborsWithUnexploredNeighbors[h].x][openNeighborsWithUnexploredNeighbors[h].y])
                return stack
              else:
                cell.flagged=False
       return stack


    def firstLevelMod(map: MinesweeperMap, stack):
      noList = []
      top = stack.pop()
      if top.content == map.numberOfSurroundingUnexplored(top) + map.numberOfSurroundingFlags(top):
        unexploredNeighbors = map.surroundingUnexplored(top)
        for neighbor in unexploredNeighbors:
            map.map[neighbor.x][neighbor.y].flag()
            #map.drawMap()
        flaggedNeighborsWithUnexploredNeighbors = map.flaggedNeighborsWithUnexploredNeighbors(top)
        for flaggedNeighbor in flaggedNeighborsWithUnexploredNeighbors:
            stack.append(map.map[flaggedNeighbor.x][flaggedNeighbor.y])
      if top.content == map.numberOfSurroundingFlags(top):
        unexploredNeighbors = map.surroundingUnexplored(top)
        for neighbor in unexploredNeighbors:
            map.map[neighbor.x][neighbor.y].reveal()
            noList.append([neighbor.x,neighbor.y])
            #map.drawMap()
        openNeighborsWithUnexploredNeighbors = map.openNeighborsWithUnexploredNeighbors(top)
        for openNeighbor in openNeighborsWithUnexploredNeighbors:
            enter= True
            for nl in noList:
              if (nl[0]==openNeighbor.x and nl[1]==openNeighbor.y):
                enter=False
                break
            if(enter):    
              stack.append(map.map[openNeighbor.x][openNeighbor.y])
      return stack

    def checkMistakes(map: MinesweeperMap):
      for i in range(len(map.map)):
        for j in range(len(map.map[0])):
          if(map.map[i][j].open and map.map[i][j].content!=-1):
            if map.map[i][j].content > map.numberOfSurroundingUnexplored(map.map[i][j]) + map.numberOfSurroundingFlags(map.map[i][j]):
              return True
            if map.map[i][j].content < map.numberOfSurroundingFlags(map.map[i][j]):
              return True
      return False

data = []
countIn = 0
while countIn<7000:
    map1 = MinesweeperMap("easy")
    solved = SolverCSP.solveMinesweeper(map1)
#     empt = "_"
#     flaging = "X"
    if(len(SolverCSP.fiveBoards)!=0):
        countIn = countIn + 1
        #print("mdav")
        data.append(SolverCSP.fiveBoards[0])
        SolverCSP.fiveBoards.pop(0)
#         for i in range(5):
#             for j in range(5):
#                 fiveCell = SolverCSP.fiveBoards[0][i][j]
#                 if(fiveCell.open==True):
#                     print(fiveCell.content, end=' ')
#                 elif(fiveCell.flagged == True):
#                     print(flaging, end=' ')
#                 else:
#                     print(empt, end=' ')
#             print()
        

In [48]:
# This is used for training the RL model
# it defines the reward system
def step(action,state):
    #finished = False
    reward = 0
    action_row = 2
    action_col = 2
    cel = state[action_row][action_col]
    cel.open = True
    cel_content = cel.getContent()

    if action == 0:
      if cel_content == -1:
        reward = +1
      else:
        reward = -1
      cel.open = False
    else:
      if cel_content != -1:
        reward = +1
      else:
        reward = -1
    
    statenp = np.array([[state[r][c].getContent() for c in range(len(state[0]))] for r in range(len(state))])

    state_ = statenp.flatten()
    return state_, reward

In [None]:
import matplotlib.pyplot as plt

def plotterr1(ii, avg, title):
    x1.append(ii)
    y1.append(avg)
    plt.scatter(x1, y1)
    # set the x and y axis labels
    plt.xlabel('X axis')
    plt.ylabel('Y axis')
    # set the title of the plot
    plt.title(title)
    # show the plot
    plt.show()
    
def plotterr2(ii, avg, title):
    x2.append(ii)
    y2.append(avg)
    plt.scatter(x2, y2)
    # set the x and y axis labels
    plt.xlabel('X axis')
    plt.ylabel('Y axis')
    # set the title of the plot
    plt.title(title)
    # show the plot
    plt.show()

lr = 0.0005 
agent = Agent(gamma=0.0, epsilon=1, alpha=lr, input_dims=25, n_actions=2, mem_size=100000, batch_size=64, epsilon_end=0.01)
agent.save_model()
agent.load_model()
scores = []
eps_history = []
x1=[]
y1=[]
x2=[]
y2=[]
for i in range(len(data)):
    done = False
    score = 0
    rewpos = []
    obs = data[i]
    observation = np.array([[obs[r][c].getContent() for c in range(len(obs[0]))] for r in range(len(obs))])
    observation = observation.flatten()
    action = agent.choose_action(observation)
    observation_, reward = step(action,obs)
    if reward ==1:
      number_of_wins= number_of_wins+1
    #print(reward," reward")
    print(agent.epsilon," eps")
    score += reward
    if(reward>0):
        rewpos.append(reward)
    agent.remember(observation, action, reward, observation_, int(done))
    observation = observation_
    agent.learn()
        
    eps_history.append(agent.epsilon)
    scores.append(score)
    sum_positive = sum([num for num in rewpos if num > 0])
    
    plotterr1(i,score, "Sum of Rewards vs Epochs")

    
    if i % 10 == 0 and i > 0:
        agent.save_model()
        
        print("save erav")