In [46]:
import numpy as np

hare_stay = 0.6
hare_move = 0.4
hare_choice = 0.5

wolf_fail = 0.2
wolf_success = 0.8
wolf_stay = 1


#WOLF ACTIONS
def up(position):
    if(position <= 2):
        return position + 2
    else:
        return position - 2
    
def down(position):
    if(position > 2):
        return position - 2
    else:
        return position + 2

def right(position):
    if(position == 1 or position ==3):
        return position + 1
    else:
        return position -1
    
def left(position):
    if(position == 2 or position == 4):
        return position - 1
    else:
        return position + 1
    
def stay(position):
    return position

#HARE MOVEMENTS
def hare_movements(position, actions):
    positions = []
    for action in actions:
        positions = positions + [action(position)]
    return list(set(positions))



###########################################################################################################

action_space = [up, down, left, right, stay]

m_dictionary = {(1,1):0, (1,2):1 ,(1,3):2, (1,4):3, (2,1):4, (2,2):5,
                (2,3):6, (2,4):7, (3,1):8, (3,2):9, (3,3):10, (3,4):11
                ,(4,1):12, (4,2):13, (4,3):14, (4,4):15}

state_space =[(1,1),(1,2),(1,3),(1,4),(2,1),(2,2),(2,3),(2,4),(3,1),(3,2),(3,3),(3,4),(4,1),(4,2),(4,3),(4,4)]

###########################################################################################################
    

def gen_prob_matrix(states, action, action_success):
    P = np.zeros(shape = (len(states), len(states)), dtype = "float")
    for initial_state in states:
        next_states = []
        wolf_current = initial_state[0]
        hare_current = initial_state[1]
        wolf_next_pos = action(wolf_current)
        hare_positions = hare_movements(hare_current, action_space)
        for hare_pos in hare_positions:
            next_states = next_states + [(wolf_current, hare_pos)]
            next_states = next_states + [(wolf_next_pos, hare_pos)]
        
        for transition in next_states:
            wolf_prob = 0
            hare_prob = 0
            if (transition[0] != wolf_current):
                wolf_prob = action_success
            else:
                wolf_prob = 1 - action_success
            #escolha da probabilidade da lebre
            if (transition[1] != hare_current):
                hare_prob = hare_move * hare_choice
            else:
                hare_prob = hare_stay
            
            row = m_dictionary[initial_state]
            column = m_dictionary[transition]
            P[row, column] = wolf_prob * hare_prob
            
    return P
            



def gen_cost_matrix(states, actions):
    C = np.zeros(shape = (len(states), len(actions)))
    C[:] = 1
    equality = [(1,1),(2,2),(3,3),(4,4)]
    verticality = [(1,3),(3,1),(2,4),(4,2)]
    horizontality = [(1,2),(2,1),(3,4),(4,3)]
    diagonality = [(1,4),(4,1),(2,3),(3,2)]
            
    
    for state in states:
        if state in equality:
            C[m_dictionary[state]][4] = 0
        elif state in verticality:
            C[m_dictionary[state]][0] = 0
            C[m_dictionary[state]][1] = 0
        elif state in horizontality:
            C[m_dictionary[state]][2] = 0
            C[m_dictionary[state]][3] = 0
        elif state in diagonality:
            for action in range(0,len(actions)-1):
                C[m_dictionary[state]][action] = 0.5
        
    
    return C


def compute_cost_to_go(probability_matrix, cost_vector, discount):
    I = np.identity(probability_matrix.shape[0])
    scaled_p = np.multiply(discount, probability_matrix)
    subtraction = np.subtract(I, scaled_p)
    inversion = np.linalg.inv(subtraction)
    J = np.dot(inversion, cost_vector)
    
    """
    print("discount * P:")
    print(scaled_p)
    print()
    
    print("I - discount * P:")
    print(subtraction)
    print()
    
    print("(I - discount * P)^-1")
    print(inversion)
    print()
    """
    
    return J




Pup = gen_prob_matrix(state_space, up, wolf_success)
Pdown = gen_prob_matrix(state_space, down, wolf_success)
Pleft = gen_prob_matrix(state_space, left, wolf_success)
Pright = gen_prob_matrix(state_space, right, wolf_success)
Pstay = gen_prob_matrix(state_space, stay, 0)

A = gen_prob_matrix(state_space, stay, 0)
C = gen_cost_matrix(state_space, action_space)


np.set_printoptions(linewidth=250)
np.set_printoptions(precision=2)
print(compute_cost_to_go(Pup, C[:,1], 0.99), "\n")


#print(C)

print()


[ 62.58  62.97  61.8   62.64  62.97  62.58  62.64  61.8   61.8   62.64  62.58  62.97  62.64  61.8   62.97  62.58] 

