In [0]:
import random
from operator import add
MIN_DELTA = 1e-4

class GridMarkovDP(object):
    def __init__(self, metadata):
        self.width = metadata['width']
        self.height = metadata['height']
        self.initial_value = metadata['initial_value']
        self.obstacles = metadata['obstacles']
        self.living_cost = metadata['living_cost']

        self.discount = metadata['discount']
        self.transition_distribution = metadata['transition_distribution']
        self.rewards = {tuple(terminal['state']) : terminal['reward'] for terminal in metadata['terminals']}
        self.terminals = list(self.rewards.keys())

        self._init_grid()

        # enumerate state space
        self.states = set()
        for row in range(self.height):
            for col in range(self.width):
                if self.grid[row][col] is not None:
                    self.states.add((row, col))
        
        # move one tile at a time
        self.actions = [(1, 0), (0, 1), (-1, 0), (0, -1)]
        self.num_actions = len(self.actions)

        # initialize values and policy
        self.policy = {}
        self.values = {}
        for state in self.states:
            self.values[state] = self.initial_value
            self.policy[state] = random.choice(self.actions)

    def R(self, state):
        if state in self.terminals:
            return self.rewards[state]
        else:
            # living cost
            return self.living_cost