In [260]:
import numpy as np 


In [261]:
class Gridworld:

    def __init__(self, shape=(10,10), num_negative_tiles=0, starting_point=(1,1),goal=(8,8),num_walls=4):


        self.size = shape
        self.num_negative_tiles = num_negative_tiles
        self.current_state = starting_point
        self.grid = np.zeros(shape, dtype=np.int64)
        self.stochastic_transistions = np.zeros(shape=(shape[0]-1,shape[1]-1), dtype=np.float32)
        self.goal = np.array(goal)
        self.acc_reward = 0 

        # set values for entries in gridworld (rewards, goal, penalties) non-doable tiles should yield -100
        self.grid[goal[0]][goal[1]] = 100


        # setting borders
        self.grid[0,:] = -100
        self.grid[shape[1]-1,:] = -100
        self.grid[:,0] = -100
        self.grid[:,shape[1]-1] = -100



        # implementing random negative tiles 
        # still need to check whether path to goal is free !!!!
        # check if indexes are unique
        unique_wall_indexes = False
        while not unique_wall_indexes:
            random_walls = np.random.randint(1,shape[0]-1,size=(num_walls,2))
            random_walls_unique = np.unique(random_walls,axis=0)
            print(goal)
            # check that the indeces neither describe the starting point nor the goal state or are doubled
            if len(random_walls) == len(random_walls_unique) and not goal in random_walls.tolist() and not starting_point in random_walls.tolist():
                unique_wall_indexes = True
        # apply for all unique indexes 
        for r in random_walls:
            self.grid[r[0],r[1]] = -100


        
    # reset the actor to starting state
    def reset(self):
        self.current_state = (1,1)
        self.acc_reward = 0



    def step(self,action):
        '''
        Args:
        action(): 0: right, 1: left, 2, up, 3, down
        throws error if move is invalid due to wall
        '''
        
        # anders ugly mit if elif statements, switch erst ab python 3.10
        # right
        if action == 0:
            # get current state 
            y, x = self.current_state
            # check that current state is accessable
            new_y,new_x = y, x+1
            if self.grid[new_y,new_x] != -100:
                # update current state and collect rewward
                self.current_state = ((new_y, new_x))
                self.acc_reward += self.grid[new_y,new_x]
            else:
                raise ValueError('Could not move there due to wall.')


        # left step
        elif action == 1:
            # get current state 
            y, x = self.current_state
            # check that current state is accessable
            new_y,new_x = y, x-1
            if self.grid[new_y,new_x] != -100:
                # update current state and collect rewward
                self.current_state = ((new_y, new_x))
                self.acc_reward += self.grid[new_y,new_x]
            else:
                raise ValueError('Could not move there due to wall.')

        # upwards step
        elif action == 2:
            # get current state 
            y, x = self.current_state
            # check that current state is accessable
            new_y,new_x = y-1, x
            if self.grid[new_y,new_x] != -100:
                # update current state and collect rewward
                self.current_state = ((new_y, new_x))
                self.acc_reward += self.grid[new_y,new_x]
            else:
                raise ValueError('Could not move there due to wall.')
        
        # downwards step
        elif action == 3:
            # get current state 
            y, x = self.current_state
            # check that current state is accessable
            new_y,new_x = y+1, x
            if self.grid[new_y,new_x] != -100:
                # update current state and collect rewward
                self.current_state = ((new_y, new_x))
                self.acc_reward += self.grid[new_y,new_x]
            else:
                raise ValueError('Could not move there due to wall.')
        else:
            raise ValueError('Action index out of bounds. Actions-space = (0,1,2,3)')

        

    def visualize(self):
        print(self.grid)






In [262]:
grid = Gridworld()
grid.step(0)
grid.visualize()
print(grid.stochastic_transistions)

(8, 8)
[[-100 -100 -100 -100 -100 -100 -100 -100 -100 -100]
 [-100 -100    0    0    0    0    0    0    0 -100]
 [-100    0    0    0    0    0    0    0    0 -100]
 [-100    0    0    0    0    0    0    0    0 -100]
 [-100    0    0    0    0    0    0    0    0 -100]
 [-100    0    0    0    0    0 -100    0    0 -100]
 [-100    0    0    0    0    0    0    0    0 -100]
 [-100    0    0    0    0    0    0 -100    0 -100]
 [-100    0    0    0    0    0 -100    0  100 -100]
 [-100 -100 -100 -100 -100 -100 -100 -100 -100 -100]]
[[0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]
 [0. 0. 0. 0. 0. 0. 0. 0. 0.]]
