In [1]:
import numpy as np
import random
from math import floor

In [2]:

MIN_SIZE = 2
MAX_SIZE = 10
DEFAULT_SIZE = 4


In [3]:
#Function to assert the builded grid is solvable
def is_solvable(grid):
    flat_grid = grid.flatten() 
    
    inversions = 0
    for i in range(len(flat_grid)):
        for j in range(i + 1, len(flat_grid)):
            if flat_grid[j] and flat_grid[i] and flat_grid[i] > flat_grid[j]:
                inversions += 1
    return inversions % 2 == 0

def build_line(start_symb, stop_symb, sep_symb, line):
    return '\n%s%s%s\n' % (start_symb, sep_symb.join(line), stop_symb)

In [4]:
# State : Correspond a la grid
# Actions: Dé deplacer :
#0: dé en haut
#1: dé a gauche
#2: dé en bas
#3: dé à droite

class Grid :
    def __init__(self,size=DEFAULT_SIZE) -> None:
        assert size>=MIN_SIZE and size <= MAX_SIZE
        self.size=size

        while True:
            grid=list(range(size**2))
            random.shuffle(grid)
            grid=np.asarray(grid).reshape(size, size)
            if is_solvable(grid):break
        self.state=grid


    def take_action(self,action):
      i, j = np.where(self.state == 0)
      if len(i) > 0 and len(j) > 0:
        i = i[0]
        j = j[0]
        if action == 0:
            if i > 0:
                self.state[i][j], self.state[i-1][j] = self.state[i-1][j], self.state[i][j]
        elif action == 2:
            if i < self.size-1:
                self.state[i][j], self.state[i+1][j] = self.state[i+1][j], self.state[i][j]
        elif action ==1:
            if j > 0:
                self.state[i][j], self.state[i][j-1] = self.state[i][j-1], self.state[i][j]
        elif action == 3:
            if j < self.size-1:
                self.state[i][j], self.state[i][j+1] = self.state[i][j+1], self.state[i][j]

    def is_finish(self):
        ended_grid=[i for i in range(1,self.size**2)]
        ended_grid.append(0)
        return np.array_equal(
            self.state,
            np.asarray(ended_grid).reshape(self.size,self.size)
            )
    
    def get_possible_actions(self)-> list[int]:
        """return the possible actions"""
        actions=[i for i in range(0,4)]
        pos_empty=self.get_empty_position()
        if pos_empty[0]==0 : actions.remove(0)
        elif pos_empty[0]==(self.size-1):actions.remove(2)
        if pos_empty[1]==0:actions.remove(1)
        elif pos_empty[1]==(self.size-1):actions.remove(3)
        return  actions
    
    def get_empty_position(self)->tuple:
        """Return the position od the tuple"""
        return floor(np.argmin(self.state)/self.size),np.argmin(self.state)%self.size

    def get_good_place(self):
      ended_grid=[i for i in range(1,self.size**2)]
      ended_grid.append(0)
      return (np.asarray(ended_grid).reshape(self.size,self.size) == self.state ).sum()

    def __str__(self) -> str:
        """Renderer"""
        tile_line = np.full(self.size, '─' * 4).tolist()
        horizontal_line = build_line('├', '┤', '┼', tile_line)
        first_horizontal_line = build_line('┌', '┐', '┬', tile_line)
        last_horizontal_line = build_line('└', '┘', '┴', tile_line)
        grid_to_show = first_horizontal_line
        for count, row in enumerate(self.state):
            grid_to_show += '│'
            for tile in row:
                if tile == 0:
                    tile = '  '
                grid_to_show += ' %s │' % '{0:>2}'.format(tile)
            if not count == self.size - 1:
                grid_to_show += horizontal_line
        grid_to_show += last_horizontal_line
        grid_to_show+=f"\n Possible actions: {self.get_possible_actions()}"
        return grid_to_show
        
    

In [5]:
grid=Grid(size=3)
print(grid)
grid.is_finish()


┌────┬────┬────┐
│  3 │  2 │  5 │
├────┼────┼────┤
│  1 │    │  4 │
├────┼────┼────┤
│  7 │  6 │  8 │
└────┴────┴────┘

 Possible actions: [0, 1, 2, 3]


False

In [6]:
class Game:
    def __init__(self) -> None:
        self.grid=Grid()
        self.end=False
        self.round=0
        self.reward=0
        self.sum_reward=0
        self.actions=[]# list of all actions done in the game
        self.number_good_place=0

    def play_round(self):
        """function_agent : fonction de choix de l'agent"""
        reward=-1
        action=np.random.randint(0,3)
        self.grid.take_action(action)
        if self.grid.is_finish():
          self.end = True
          reward += 50

        if self.grid.get_good_place() == self.number_good_place:
          reward -= self.last_good # more we dont increase the number of well positioned stuff more reward we loose
          self.last_good += 1
        elif self.grid.get_good_place() < self.number_good_place:
          reward -= 5
        else:
          reward+=self.grid.get_good_place()
        return reward,self.grid.state,action


    def play_game(self):
        while not self.end or self.round < 30:
            reward,state,action=self.play_round()
            self.reward=reward
            self.sum_reward+=reward
            self.actions.append(action)
            self.round+=1
            print(self.round)
            print(grid)


    def __str__(self):
        game_to_show=f"---- {self.round} ----------\n"
        game_to_show+=self.grid.__str__()
        game_to_show+=f"\nReward Round{self.reward} \nReward cum:{self.sum_reward}"
        return game_to_show




In [7]:
gg=Game()

In [8]:
gg.play_game()

1

┌────┬────┬────┐
│  3 │  2 │  5 │
├────┼────┼────┤
│  1 │    │  4 │
├────┼────┼────┤
│  7 │  6 │  8 │
└────┴────┴────┘

 Possible actions: [0, 1, 2, 3]
2

┌────┬────┬────┐
│  3 │  2 │  5 │
├────┼────┼────┤
│  1 │    │  4 │
├────┼────┼────┤
│  7 │  6 │  8 │
└────┴────┴────┘

 Possible actions: [0, 1, 2, 3]
3

┌────┬────┬────┐
│  3 │  2 │  5 │
├────┼────┼────┤
│  1 │    │  4 │
├────┼────┼────┤
│  7 │  6 │  8 │
└────┴────┴────┘

 Possible actions: [0, 1, 2, 3]
4

┌────┬────┬────┐
│  3 │  2 │  5 │
├────┼────┼────┤
│  1 │    │  4 │
├────┼────┼────┤
│  7 │  6 │  8 │
└────┴────┴────┘

 Possible actions: [0, 1, 2, 3]
5

┌────┬────┬────┐
│  3 │  2 │  5 │
├────┼────┼────┤
│  1 │    │  4 │
├────┼────┼────┤
│  7 │  6 │  8 │
└────┴────┴────┘

 Possible actions: [0, 1, 2, 3]
6

┌────┬────┬────┐
│  3 │  2 │  5 │
├────┼────┼────┤
│  1 │    │  4 │
├────┼────┼────┤
│  7 │  6 │  8 │
└────┴────┴────┘

 Possible actions: [0, 1, 2, 3]
7

┌────┬────┬────┐
│  3 │  2 │  5 │
├────┼────┼────┤
│  1 │    │  4 │

KeyboardInterrupt: 