In [42]:
import numpy as np
from gym import spaces

In [43]:
BLANK_ID = " "
ENEMY_ID = "x"
PLAYER_ID = "p"
WALL_ID = "#"
END_ID = "o"

class Environment:
    def __init__(self, name: str = "CustomEnvironment"):
        self.name = name
        self.mapsize = (10,10) # scalable in future
        self.nrows = self.mapsize[0]
        self.ncols = self.mapsize[1]
        self.endposition = (8,8)
        self.action_space = ['up','down','left','right']
        self.action_size = len(self.action_space)
        self.state_size = self.nrows * self.ncols
        self.state_space = spaces.Discrete(self.state_size)
        self.map = self._generate_random_map(7)
        
    def _generate_random_map(self, nenemies: int):
        envmap = np.full(self.mapsize, BLANK_ID)
        envmap[:,[0,-1]] = envmap[[0,-1]] = WALL_ID
        envmap[self.endposition] = END_ID
        for i in range(nenemies):
            validpos = False
            x = 0
            y = 0
            while not validpos:
                x = np.random.randint(0, self.nrows)
                y = np.random.randint(0, self.ncols)
                validpos = envmap[x][y] == BLANK_ID and (x,y) != self.endposition
            envmap[x][y] = ENEMY_ID
        return envmap
    
    def is_wall(self, x, y):
        return self.map[x][y] == WALL_ID

    def is_enemy(self, x, y):
        return self.map[x][y] == ENEMY_ID
    
    def is_end(self, x, y):
        return self.map[x][y] == END_ID
    
    def show_map(self):
        if self.map is not None:
            print(self.map)
    
    def to_state(self, row, col):
        return row * self.ncols + col
    
    
    def step(self, action):
        oldx, oldy = self.state
        x,y = self.move(oldx, oldy, action)
        if self.is_wall(x,y):
            reward = -100
        elif self.is_enemy(x,y):
            reward = -60
        elif self.is_end(x,y):
            reward = 200
        else:
            reward = -1
        done = self.is_wall(x,y) or self.is_end(x,y)
        return (x,y),reward, done, False # False is useless, just to match gym returns
        
    
    def reset(self):
        self.state = (np.random.randint(0, self.nrows), np.random.randint(0, self.ncols))
        return self.state
    
    def move(x, y, actionindex):
        if self.actions[actionindex] == "up":
            row = max(row - 1, 0)
        elif self.actions[actionindex] == "left":
            col = max(col - 1, 0)
        elif self.actions[actionindex] == "down":
            row = min(row + 1, nrow - 1)
        elif self.actions[actionindex] == "right":
            col = min(col + 1, ncol - 1)
        return (row, col)
            
    
        
        

In [44]:
e = Environment()