In [17]:
import numpy as np
from termcolor import colored
import math

### 1. Game implementation

In [18]:
np.random.seed(10)

### Reference for the Random seed: https://www.w3schools.com/python/ref_random_seed.asp

In [19]:
def make_grid(x, y, n):
    return np.random.randint(0, n, (x, y))

In [20]:
scorings = ['relative', 'difference']
directions = [(x, y) for x in (-1, 0 ,1) for y in (-1, 0, 1)]

class Game:
    def __init__(self, x, y, n, scoring, seed=10):
        np.random.seed(seed)
        self.width = y
        self.height = x
        self.game_state = {'grid': make_grid(x,y,n), 'score': 0, 'seen': np.zeros((x,y))}
        self.is_end = False
        self.agent_state = {'pos': (0,0), 'history':set()}
        assert scoring in scorings
        self.scoring = scoring
        
    def step(self, velocity):
        if self.is_end:
            return
        # make sure direction is valid
        assert velocity in directions
        grid = self.game_state['grid']
        x, y = self.agent_state['pos']
        pos_t = (x + velocity[0], y + velocity[1])
        # make sure new position is inside grid
        if (pos_t[0] >= self.height or pos_t[0] < 0) or (pos_t[1] >= self.width or pos_t[1] < 0):
            return
        else:
            # update position
            self.agent_state['history'].add((x,y))
            self.agent_state['pos'] = pos_t
            self.game_state['seen'][pos_t] = 1
            # update score
            if self.scoring == 'relative':
                self.game_state['score'] += grid[x, y]
            elif self.scoring == 'difference':
                self.game_state['score'] += (math.abs(grid[x, y] - grid[pos_t[0], pos_t[1]]))
            
            # self.show_path()
            if (pos_t == (self.height, self.width)):
                self.is_end = true      
                
    def get_neighbours(self):
        grid = self.game_state['grid']
        x, y = self.agent_state['pos']
        visited = self.game_state['seen']
        neighbours = dict()
        for pos in directions:
            x, y = self.agent_state['pos']
            x += pos[0]
            y += pos[1]
            if not ((x >= self.height or x < 0) or (y >= self.width or y < 0)) and pos != (0,0) and visited[x,y] == 0:
                neighbours[(x,y)] = grid[x][y]
        return neighbours
        
    def show_grid(self):
        print('GAME GRID')
        grid = self.game_state['grid']
        for i in range(grid.shape[0]):
            print('| ', end='')
            for j in range(grid.shape[1]):
                print(colored(grid[i, j],'red'), end=' | ')
            print('\n')
    
    def show_path(self):
        score = self.game_state['score']
        print(f'GAME GRID WITH PATH\nSCORE: {score}')
        grid = self.game_state['grid']
        path = self.agent_state['history']
        for i in range(grid.shape[0]):
            print('| ', end='')
            for j in range(grid.shape[1]):
                if ((i, j) in path):
                    print(colored(grid[i, j], 'green'), end=' | ')
                elif (i, j) == self.agent_state['pos']:
                    print(''+colored(grid[i,j], 'red')+colored('*', 'blue'),end='| ')
                else: 
                    print(colored(grid[i, j], 'red'), end=' | ')
            print('\n')
            
    

In [27]:
game =  Game(3,3,10, 'relative')
game.show_grid()
game.show_path()

GAME GRID
| [31m9[0m | [31m4[0m | [31m0[0m | 

| [31m1[0m | [31m9[0m | [31m0[0m | 

| [31m1[0m | [31m8[0m | [31m9[0m | 

GAME GRID WITH PATH
SCORE: 0
| [31m9[0m[34m*[0m| [31m4[0m | [31m0[0m | 

| [31m1[0m | [31m9[0m | [31m0[0m | 

| [31m1[0m | [31m8[0m | [31m9[0m | 



In [22]:
game.step((0,1))
game.show_path()

GAME GRID WITH PATH
SCORE: 9
| [32m9[0m | [31m4[0m[34m*[0m| [31m0[0m | 

| [31m1[0m | [31m9[0m | [31m0[0m | 

| [31m1[0m | [31m8[0m | [31m9[0m | 



In [23]:
game.step((0,1))
game.show_path()

GAME GRID WITH PATH
SCORE: 13
| [32m9[0m | [32m4[0m | [31m0[0m[34m*[0m| 

| [31m1[0m | [31m9[0m | [31m0[0m | 

| [31m1[0m | [31m8[0m | [31m9[0m | 



### 2. Heuristic algorithm

We will implement a greedy strategy that attempts maximise the short term reward of the agent by always moving first to cells that in the direction of the endpoint that have the shortest path values.

In [24]:
game =  Game(3,3,10, 'relative')
game.show_path()
game.get_neighbours()

GAME GRID WITH PATH
SCORE: 0
| [31m9[0m[34m*[0m| [31m4[0m | [31m0[0m | 

| [31m1[0m | [31m9[0m | [31m0[0m | 

| [31m1[0m | [31m8[0m | [31m9[0m | 



{(0, 1): 4, (1, 0): 1, (1, 1): 9}

In [25]:
while (not game.is_end):
    neighbours = game.get_neighbours()
    # first sort moves based on actual distance to end
    
    game.show_path()
    break

GAME GRID WITH PATH
SCORE: 0
| [31m9[0m[34m*[0m| [31m4[0m | [31m0[0m | 

| [31m1[0m | [31m9[0m | [31m0[0m | 

| [31m1[0m | [31m8[0m | [31m9[0m | 



### 3. Djikstras


For each cell in the grid, we need to work out every possible path to it, and the score of such path. We start with the first cell, then work out the next cells it can reach and recursively keep applying this until we have data for all cells.  Once this is complete, we start the end cell and select the next cell in the grid that has the shortest total path, then repeat until we have a path to the origin.