1. Given a state, get values for states [up,down,left,right] from it
2. change the row numbers

In [1]:
import numpy as np
import random

from sklearn.externals import joblib

In [2]:
q_matrix_load = joblib.load('qMatrix_7by7_no_walls.pkl')

In [4]:
posState = (6,6)
negState = (5,5)

In [39]:
num_rows = 7
num_cols = 7
col_max = num_cols - 1
row_max = num_rows - 1

def makeMoveMatrix(q_matrix):
    
    actionAbbr = ['U','D','L','R']
    movementGrid = []

    for i in range(row_max+1):
        #x = q_matrix[i*5 : i*5+5].flatten()
        x = q_matrix[i*num_rows : i*num_rows+num_cols].flatten()
        row = []
        for j in range(col_max+1):
            singleState = x[j*4 : j*4+4]
            qMax = np.max(singleState)
            indices = np.argwhere(singleState == qMax)
            row.append(actionAbbr[np.random.choice(indices.flatten())])
            #row.append(actionAbbr[np.argmax(x[j*4:j*4+4])])
        movementGrid.append(row)
    moveMatrix = np.array(movementGrid)
    
    #putting in pos and neg states
    moveMatrix[posState] = '+'
    moveMatrix[negState] = '-'
    
    return moveMatrix

In [40]:
moveMatrix = makeMoveMatrix(q_matrix_load)
moveMatrix

array([['R', 'D', 'R', 'R', 'D', 'R', 'D'],
       ['D', 'R', 'D', 'D', 'D', 'D', 'D'],
       ['R', 'R', 'R', 'D', 'R', 'R', 'D'],
       ['D', 'D', 'R', 'D', 'D', 'D', 'D'],
       ['D', 'D', 'D', 'R', 'D', 'R', 'D'],
       ['R', 'R', 'R', 'R', 'D', '-', 'D'],
       ['R', 'R', 'R', 'R', 'R', 'R', '+']], 
      dtype='<U1')

In [41]:
qMinValue = np.min(q_matrix_load)
police = [qMinValue]*4

In [42]:
police

[-97, -97, -97, -97]

In [44]:
row_max = 6
col_max = 6

def actionsAvailable(state, row_max, col_max):
    row, col = state
    directions = [-1,-1,-1,-1]
    if row < row_max: directions[1] = 0
    if row > 0: directions[0] = 0
    if col < col_max: directions[3] = 0
    if col > 0: directions[2] = 0
    return directions

def get_qMatrix_rowNum(state):
    return int((col_max + 1)*state[0] + state[1])

def neighbourStates(state):
    '''
    - Depending on 'state' we will get the one [above_it, below_it, left_of_it, on_its_right]
    - if no such state exists we will get -1
    - example:
      neighbourStates((0,0)) ==> [-1, (1, 0), -1, (0, 1)]
    '''
    neighbours = actionsAvailable(state, row_max, col_max)
    if neighbours[0] != -1:
        neighbours[0] = (state[0]-1, state[1])
    if neighbours[1] != -1:
        neighbours[1] = (state[0]+1, state[1])
    if neighbours[2] != -1:
        neighbours[2] = (state[0], state[1]-1)
    if neighbours[3] != -1:
        neighbours[3] = (state[0], state[1]+1)
    
    return neighbours

def update_qMatrix(state, q_matrix):
    q_temp = np.array(q_matrix, copy=True) 
    actionIndex = [1,0,3,2]
    policeNeighbours = neighbourStates(state)
    #print('index - state at index - q_matrix row num - q_matrix row')
    for index, val in enumerate(policeNeighbours):
        if val != -1:
            q_matrix_row = get_qMatrix_rowNum(val)
            q_temp[q_matrix_row, actionIndex[index]] = np.min(q_temp)
            #print(index, '-', val, '-', get_qMatrix_rowNum(val), '-', q_matrix[get_qMatrix_rowNum(val)])
            
    return q_temp

In [45]:
print(makeMoveMatrix(q_matrix_load))
print('\n')
new_q_matrix = update_qMatrix((0,2), q_matrix_load)
print(makeMoveMatrix(new_q_matrix))

[['D' 'D' 'R' 'D' 'R' 'R' 'D']
 ['R' 'D' 'R' 'D' 'D' 'R' 'D']
 ['D' 'R' 'R' 'D' 'R' 'D' 'D']
 ['R' 'R' 'R' 'D' 'D' 'D' 'D']
 ['D' 'D' 'R' 'R' 'D' 'R' 'D']
 ['R' 'R' 'R' 'D' 'D' '-' 'D']
 ['R' 'R' 'R' 'R' 'R' 'R' '+']]


[['R' 'D' 'D' 'D' 'D' 'R' 'D']
 ['R' 'R' 'D' 'R' 'R' 'R' 'D']
 ['R' 'D' 'D' 'D' 'D' 'R' 'D']
 ['D' 'R' 'D' 'D' 'R' 'D' 'D']
 ['R' 'R' 'D' 'R' 'R' 'R' 'D']
 ['R' 'R' 'D' 'D' 'D' '-' 'D']
 ['R' 'R' 'R' 'R' 'R' 'R' '+']]


### Now what will be interesting is to get trail to destination no matter where the police is

In [64]:
# for make a breadcrumb matrix
def createTrail():
    emptyStrRow = [' ']*num_cols
    trailLst = []
    for _ in range(7):
        trailLst.append(emptyStrRow)
        
    trail = np.array(trailLst)
    trail[posState] = '+'
    trail[negState] = '-'
    return trail

def breadcrumb(policeState):
    state = (0,0)
    breadCrumb = [state]
    
    trail = createTrail()
    trail[state] = '@'
    trail[policeState] = '#'
    
    new_q_matrix = update_qMatrix(policeState, q_matrix_load)
    
    while state != posState:
        
        direction = makeMoveMatrix(new_q_matrix)[state]
        if direction == 'U': state = (state[0]-1, state[1])
        if direction == 'D': state = (state[0]+1, state[1])
        if direction == 'L': state = (state[0], state[1]-1)
        if direction == 'R': state = (state[0], state[1]+1)
        
        breadCrumb.append(state)
    
    for element in breadCrumb[1:-1]:
        trail[element] = '*'
        
    return trail, breadCrumb

In [65]:
breadcrumb((6,0))

(array([['@', ' ', ' ', ' ', ' ', ' ', ' '],
        ['*', '*', ' ', ' ', ' ', ' ', ' '],
        [' ', '*', '*', ' ', ' ', ' ', ' '],
        [' ', ' ', '*', '*', '*', ' ', ' '],
        [' ', ' ', ' ', ' ', '*', '*', '*'],
        [' ', ' ', ' ', ' ', ' ', '-', '*'],
        ['#', ' ', ' ', ' ', ' ', ' ', '+']], 
       dtype='<U1'),
 [(0, 0),
  (1, 0),
  (1, 1),
  (2, 1),
  (2, 2),
  (3, 2),
  (3, 3),
  (3, 4),
  (4, 4),
  (4, 5),
  (4, 6),
  (5, 6),
  (6, 6)])

In [38]:
for i in range(6):
    print(breadcrumb((i,6)))
    print('\n')

[['@' ' ' ' ' ' ' ' ' ' ' '#']
 ['*' ' ' ' ' ' ' ' ' ' ' ' ']
 ['*' '*' ' ' ' ' ' ' ' ' ' ']
 [' ' '*' '*' ' ' ' ' ' ' ' ']
 [' ' ' ' '*' '*' ' ' ' ' ' ']
 [' ' ' ' ' ' '*' ' ' '-' ' ']
 [' ' ' ' ' ' '*' '*' '*' '+']]


[['@' '*' ' ' ' ' ' ' ' ' ' ']
 [' ' '*' '*' '*' ' ' ' ' '#']
 [' ' ' ' ' ' '*' '*' '*' '*']
 [' ' ' ' ' ' ' ' ' ' ' ' '*']
 [' ' ' ' ' ' ' ' ' ' ' ' '*']
 [' ' ' ' ' ' ' ' ' ' '-' '*']
 [' ' ' ' ' ' ' ' ' ' ' ' '+']]


[['@' ' ' ' ' ' ' ' ' ' ' ' ']
 ['*' ' ' ' ' ' ' ' ' ' ' ' ']
 ['*' ' ' ' ' ' ' ' ' ' ' '#']
 ['*' ' ' ' ' ' ' ' ' ' ' ' ']
 ['*' ' ' ' ' ' ' ' ' ' ' ' ']
 ['*' '*' ' ' ' ' ' ' '-' ' ']
 [' ' '*' '*' '*' '*' '*' '+']]


[['@' ' ' ' ' ' ' ' ' ' ' ' ']
 ['*' '*' ' ' ' ' ' ' ' ' ' ']
 [' ' '*' '*' '*' '*' '*' ' ']
 [' ' ' ' ' ' ' ' ' ' '*' '#']
 [' ' ' ' ' ' ' ' ' ' '*' '*']
 [' ' ' ' ' ' ' ' ' ' '-' '*']
 [' ' ' ' ' ' ' ' ' ' ' ' '+']]


[['@' ' ' ' ' ' ' ' ' ' ' ' ']
 ['*' ' ' ' ' ' ' ' ' ' ' ' ']
 ['*' ' ' ' ' ' ' ' ' ' ' ' ']
 ['*' '*' '*' '*' '*' ' ' '