In [2]:
import numpy as np
import json

In [71]:
with open("input.json", "r") as read_file:
    data = json.load(read_file)
data

{'disc': 1.0,
 'reward_function': {'r': 1.0, 'r_B': [], 'r_T': [-1.0, 1.0]},
 'transition_rates': {'p1': 0.8, 'p2': 0.1, 'p3': 0.1},
 'world': {'size': {'M': 3, 'N': 4},
  'states': {'B': [], 'F': [[2, 2]], 'S': [1, 1], 'T': [[4, 2], [4, 3]]}}}

In [72]:
w = World(**data['world']['size'], **data['world']['states'])

In [88]:
class MarkovDecisionProcess:
    def __init__(self, *, N, M, S, T, F, B, p1, p2, p3, r, r_T, r_B, disc):
        self.N, self.M = N, M
        self.S = S
        self.T = self.lists_to_tuples(T)
        self.F = self.lists_to_tuples(F)
        self.B = self.lists_to_tuples(B)
        self.p1, self.p2, self.p3 = p1, p2, p3
        self.r, self.r_T, self.r_B = r, r_T, r_B
        self.disc = disc  # discounting
        
    def lists_to_tuples(self, I):
        return [(a, b) for a, b in I]

MDP = MarkovDecisionProcess(**data['world']['size'], 
                            **data['world']['states'], 
                            **data['transition_rates'],
                            **data['reward_function'],
                            disc=data['disc'])

In [89]:
class World:
    def __init__(self):  # S = (1,1), T = [(1,4), (5,2)], F = [(4,2)], B
        self.grid = [[State(x, y) for y in range(1, MDP.M + 1)] for x in range(1, MDP.N + 1)]
        
        self.get_state(*MDP.S).kind = 'S'
        
        for t, rt in zip(MDP.T, MDP.r_T):
            self.get_state(*t).kind = 'T'
            self.get_state(*t).reward = rt
        for b, rb in zip(MDP.B, MDP.r_B):
            self.get_state(*b).kind = 'B'
            self.get_state(*b).reward = rb
        for f in MDP.F:
            self.get_state(*f).kind = 'F'
            
    def get_state(self, x, y):
        return self.grid[x-1][y-1]
        
    def print_usability(self):
        pass
    
    def print_policy(self):
        pass
    
    
class State:
    def __init__(self, x, y, kind="N", reward=MDP.r):
        self.x, self.y = x, y
        self.kind = kind  # S/T/F/B
        self.reward = reward
        self.usability = 1.
        self.policy = ""
    
    def is_valid(self, x, y):
        return 1 <= x <= MDP.N and 1 <= y <= MDP.M
        
    def neighbours(self):
        neigh_coordinates = [(self.x + i, self.y + j)
                             for i in [-1, 0, 1] for j in [-1, 0, 1]
                             if (i != 0 or j != 0)]
        return [c for c in neigh_coordinates
                if self.is_valid(*c) and c not in MDP.F]

    def move(self, d):
        if d == "U":
            pass
        
        if d == "D":
            pass
        
        if d == "L":
            pass
        
        if d == "R":
            pass

In [90]:
w = World()
w.get_state(1,1).neighbours()

[(1, 2), (2, 1)]