In [1]:
%matplotlib inline

In [7]:
import numpy as np

import gymnasium as gym
from gymnasium import spaces
import tensorflow as tf


  "Gymnasium minimally supports python 3.6 as the python foundation not longer supports the version, please update your version to 3.7+"


In [8]:
class NNGridWorldEnv(gym.Env):
    def __init__(self, maze, grid_model_path, reward_model_path):
        self.maze = np.array(maze)
        self.start_pos = (np.concatenate(np.where(self.maze == 'S'))).astype(np.int32)  # Starting position
        self.goal_pos = (np.concatenate(np.where(self.maze == 'G'))).astype(np.int32)  # Goal position
        self.num_rows, self.num_cols = self.maze.shape

        self.observation_space = spaces.Box(low=np.array([0, 0]), high=np.array([self.num_rows, self.num_cols]), dtype=np.int32)
        self.action_space = spaces.Discrete(4)

        # Load models
        print("Loading models...")
        self.grid_model = tf.keras.models.load_model(grid_model_path, compile=False)
        self.reward_model = tf.keras.models.load_model(reward_model_path, compile=False)
        print("Models loaded")


    def _is_valid_position(self, pos):
        row, col = pos
        # If agent hits a wall or goes out of the grid
        if self.maze[row, col] == '#' or row < 0 or col < 0 or row >= self.num_rows or col >= self.num_cols:
            return False
        return True

    def reset(self, seed=None, options=None):
        self._agent_location = self.start_pos 
        self._target_location = self.goal_pos

        return self._agent_location, {}
    
    def step(self, action):
        input_model = np.column_stack(np.array([self._agent_location[0], self._agent_location[1], action]))

        #round the values
        new_pos = np.array(np.round(self.grid_model.predict(input_model, verbose=0)[0]), dtype=int)
        reward = int(np.round(self.reward_model.predict(input_model, verbose=0)[0]))

        # Check if the new position is valid
        if self._is_valid_position(new_pos):
            self._agent_location = new_pos

        # An episode is done if the agent has reached the target
        terminated = np.array_equal(self._agent_location, self._target_location)        

        return self._agent_location, reward, terminated, False, {}
    
    def render(self):
        for row in self.maze:
            string = ""
            for col in row:
                string += col
            print(string)

    def close(self):
        pass

In [9]:
maze = [
    ['.', '.', '#', '.', 'G'],
    ['.', '.', '#', '.', '.'],
    ['.', '.', '.', '.', '.'],
    ['.', '.', '#', '.', '.'],
    ['S', '.', '#', '.', '.'],
]

grid_model_path = '../data/models/modelo_entorno.h5'
reward_model_path = '../data/models/modelo_reward.h5'


env = NNGridWorldEnv(maze, grid_model_path, reward_model_path)

# try draw the grid world
obs = env.reset()
env.render()

Loading models...
Instructions for updating:
If using Keras pass *_constraint arguments to layers.


Models loaded
..#.G
..#..
.....
..#..
S.#..


In [11]:
from stable_baselines import DQN, PPO2, A2C, ACKTR 

# Train the agent
model = DQN('MlpPolicy', env, verbose=1).learn(100000)

AttributeError: module 'contextlib' has no attribute 'nullcontext'