In [26]:
import gymnasium as gym
import gridworld
import numpy as np

def map_to_state(grid_position, grid_size)-> int:
    return grid_position[0] * grid_size + grid_position[1]   
    
def print_matrix_with_arrows(matrix, g_position=None):

    def get_arrow_direction(neighbors):
        directions = ['↑', '↓', '←', '→']
        max_value = max(neighbors, default=float('-inf'))
        if neighbors[0] == max_value:  # Up
            return directions[0]
        elif neighbors[1] == max_value:  # Down
            return directions[1]
        elif neighbors[2] == max_value:  # Left
            return directions[2]
        elif neighbors[3] == max_value:  # Right
            return directions[3]
        return ' '  # No arrow if no neighbors

    rows = len(matrix)
    cols = len(matrix[0])
    
    output = [[' ' for _ in range(cols)] for _ in range(rows)]

    for r in range(rows):
        for c in range(cols):
            if g_position and [r, c] == g_position:
                output[r][c] = 'G'
                continue

            neighbors = []

            # Up
            if r > 0:
                neighbors.append(matrix[r-1][c])
            else:
                neighbors.append(float('-inf'))  # Default to negative infinity if no neighbor

            # Down
            if r < rows - 1:
                neighbors.append(matrix[r+1][c])
            else:
                neighbors.append(float('-inf'))  # Default to negative infinity if no neighbor

            # Left
            if c > 0:
                neighbors.append(matrix[r][c-1])
            else:
                neighbors.append(float('-inf'))  # Default to negative infinity if no neighbor

            # Right
            if c < cols - 1:
                neighbors.append(matrix[r][c+1])
            else:
                neighbors.append(float('-inf'))  # Default to negative infinity if no neighbor

            arrow = get_arrow_direction(neighbors)
            output[r][c] = arrow

    for row in output:
        print(' '.join(row))


In [30]:
grid_size = 4

env = gym.make("GridWorld-v0", random_start=True, random_goal=False, size=grid_size)

# Array di tutti gli stati
states = np.array([], dtype=int)

# Trasformo tutte le posizioni della griglia in stati
for i in range(grid_size):
    for j in range(grid_size):
        states = np.append(states, map_to_state([i,j], grid_size))
N_states = len(states)

# Init Value function e Elegibility
V = np.zeros(N_states)
E = np.zeros(N_states)

n_episodes = 1000  # Numero episodi totali
alpha = 0.05           # LR
gamma = 1             # Undiscounted
lambda_ = 0.9         # E decay

for episode in range(n_episodes):

    # Init environment ad ogni episodio
    observation, info = env.reset()
    done = False
    curr_cell = observation['agent']
    curr_state = map_to_state(curr_cell, grid_size)
    E = np.zeros(N_states)
    while not done:
        # Eseguo azione casuale e raccolgo osservazioni e reward
        action = env.action_space.sample()
        observation, reward, terminated, truncated, info = env.step(action)
        next_cell = observation['agent']
        next_state = map_to_state(next_cell, grid_size)
        
        # Decadimento E per tutti gli stati
        E *= lambda_ * gamma
        
        # Se non mi trovo nello stato finale aggiorno E per lo stato corrente
        if not terminated:
            E[curr_state] += 1
            
        # Calcolo TD error
        delta = reward + gamma*V[next_state] - V[curr_state]

        # Update di V per tutti gli stati
        V += alpha*delta*E   
                
        curr_state = next_state
        done = terminated or truncated

# Costruisco la matrice finale della Value functio e la visualizzo
env.close()
V_matrix = V.reshape((grid_size, grid_size))
print(V_matrix) 
print_matrix_with_arrows(V_matrix, [grid_size-1, grid_size-1])

[[-61.82996601 -60.44275057 -53.20091247 -52.19156683]
 [-57.63731956 -56.23746825 -50.23089968 -40.87264722]
 [-58.68388552 -50.84941157 -40.22866425 -31.70419882]
 [-59.57926077 -51.04634866 -44.76833656   0.        ]]
↓ → ↓ ↓
→ → ↓ ↓
→ → → ↓
→ → → G
