# COMS4061A - Reinforcement Learning

## Markov Decision Processes

- Mamello Seboholi [1851317]

### Imports

In [1]:
import numpy as np

### Utils

In [2]:
def get_value(array, x, y, default = None):
    if x < 0 or y < 0:
        return default

    if x >= len(array):
        return default

    row = array[x]

    if y >= len(array):
        return default

    return row[y]

### Action

In [20]:
class Action:
    def __init__(self, x_modifier, y_modifier, desc = "Action"):
        self.x_modifier = x_modifier
        self.y_modifier = y_modifier
        self.desc = desc

    def __str__(self):
        return f"{self.desc}"

    def get_new_position(self, x, y):
        return self.x_modifier(x), self.y_modifier(y)

UP = Action(lambda x: x, lambda y: y-1 , desc="UP")
RIGHT = Action(lambda x: x+1, lambda y: y, desc="RIGHT")
DOWN = Action(lambda x: x, lambda y: y+1, desc="DOWN")
LEFT = Action(lambda x: x-1, lambda y: y, desc="LEFT")

### State

In [28]:
class State:
    def __init__(self, x, y, actions):
        self.x = x
        self.y = y
        self.actions = actions

    def __repr__(self):
        return f"[{self.x}][{self.y}] actions: {[str(action) for action in self.actions]}"

    def get_actions(self):
        return self.actions

### MPD

In [31]:
class MDP:
    def __init__(self, world):
        self.states = MDP.get_states_from_world(world)

    def __str__(self):
        return f"{self.states}"

    def get_state(self, x, y):
        return self.states[y][x]

    @staticmethod
    def get_states_from_world(world):
        states = []
        for i, row in enumerate(world):
            states_row = []
            for j, cell in enumerate(row):
                actions = []
                # Up
                if (get_value(world, i-1, j, -1) != -1):
                    actions.append(UP)

                # Right
                if (get_value(world, i, j+1, -1) != -1):
                    actions.append(RIGHT)

                # Down
                if (get_value(world, i+1, j, -1) != -1):
                    actions.append(DOWN)

                # Left
                if (get_value(world, i, j-1, -1) != -1):
                    actions.append(LEFT)

                states_row.append(State(j, i, actions))
            states.append(states_row)
        return states

### World Map

- 7x7 grid

In [33]:
world = [[ 0, 0, 0, 0, 0, 0, 0],
        [ 0, 0, 0, 0, 0, 0, 0 ],
        [ -1, -1, -1, -1, -1, -1, 0 ],
        [ 0, 0, 0, 0, 0, 0, 0 ],
        [ 0, 0, 0, 0, 0, 0, 0 ],
        [ 0, 0, 0, 0, 0, 0, 0 ],
        [ 0, 0, 0, 0, 0, 0, 0 ]]

mdp = MDP(world)

print(mdp.get_state(x=6, y=1))

[6][1] actions: ['UP', 'DOWN', 'LEFT']
