In [1]:
import json

In [2]:
# examine structure of sample mdp in json format
with open("sample.json", "r") as read_file:
    data = json.load(read_file)

In [3]:
data

{'gamma': 0.75,
 'states': [{'actions': [{'id': 0,
     'transitions': [{'id': 0, 'probability': 0.5, 'reward': 1, 'to': 2},
      {'id': 1, 'probability': 0.5, 'reward': 0, 'to': 1}]},
    {'id': 1,
     'transitions': [{'id': 0, 'probability': 1.0, 'reward': 1, 'to': 0}]}],
   'id': 0},
  {'actions': [{'id': 0,
     'transitions': [{'id': 0, 'probability': 1, 'reward': 0, 'to': 1}]},
    {'id': 1,
     'transitions': [{'id': 0, 'probability': 1, 'reward': 0, 'to': 1}]}],
   'id': 1},
  {'actions': [{'id': 0,
     'transitions': [{'id': 0, 'probability': 0.9, 'reward': 0, 'to': 0},
      {'id': 1, 'probability': 0.1, 'reward': 0, 'to': 2}]},
    {'id': 1,
     'transitions': [{'id': 0, 'probability': 1, 'reward': 0, 'to': 1}]}],
   'id': 2}]}

In [4]:
data['states'][0].keys()

dict_keys(['id', 'actions'])

In [5]:
type(data['states'][0])

dict

In [6]:
data['states'][0]

{'actions': [{'id': 0,
   'transitions': [{'id': 0, 'probability': 0.5, 'reward': 1, 'to': 2},
    {'id': 1, 'probability': 0.5, 'reward': 0, 'to': 1}]},
  {'id': 1,
   'transitions': [{'id': 0, 'probability': 1.0, 'reward': 1, 'to': 0}]}],
 'id': 0}

In [7]:
type(data['states'][0]['actions'])

list

In [8]:
# exactly two actions per state
data['states'][0]['actions']

[{'id': 0,
  'transitions': [{'id': 0, 'probability': 0.5, 'reward': 1, 'to': 2},
   {'id': 1, 'probability': 0.5, 'reward': 0, 'to': 1}]},
 {'id': 1,
  'transitions': [{'id': 0, 'probability': 1.0, 'reward': 1, 'to': 0}]}]

In [9]:
type(data['states'][0]['actions'][0])

dict

In [10]:
data['states'][0]['actions'][0]

{'id': 0,
 'transitions': [{'id': 0, 'probability': 0.5, 'reward': 1, 'to': 2},
  {'id': 1, 'probability': 0.5, 'reward': 0, 'to': 1}]}

In [11]:
data['states'][0]['actions'][0]['id']

0

In [12]:
data['states'][0]['actions'][0]['transitions']

[{'id': 0, 'probability': 0.5, 'reward': 1, 'to': 2},
 {'id': 1, 'probability': 0.5, 'reward': 0, 'to': 1}]

In [13]:
data['states'][0]['actions'][0]['transitions'][0]

{'id': 0, 'probability': 0.5, 'reward': 1, 'to': 2}

In [14]:
data['states'][0]['actions'][0]['transitions'][0]['probability']

0.5

In [15]:
# smaller example of using figure 2.3 in 
# "On the Complexity of Solving Markov Decision Problems" paper

# iterate no.of states
dataset = dict()
dataset['gamma'] = 0.75
dataset['states'] = []
for i in range(1,7):
    states_i = dict()
    states_i['id'] = i
    states_i['actions'] = []
    # iterate two possible actions, each action correspond to deterministic transition
    for j in range(2):
        actions_i_j = dict()
        actions_i_j['id'] = j
        actions_i_j['transitions'] = []
        if (i%2 == 1 or i == 0):
            transitions_i_j = dict()
            transitions_i_j['id'] = 0
            transitions_i_j['probability'] = 1
            transitions_i_j['reward'] = 0
            transitions_i_j['to'] = i+j+2
            actions_i_j['transitions'].append(transitions_i_j)
        else:
            # iterate two possible transitions
            for  k in range(2):
                transitions_i_j_k = dict()
                transitions_i_j_k['id'] = 0
                transitions_i_j_k['probability'] = 0.5
                transitions_i_j_k['reward'] = 0
                transitions_i_j_k['to'] = i+k+1
                actions_i_j['transitions'].append(transitions_i_j_k)
        states_i['actions'].append(actions_i_j)
    dataset['states'].append(states_i)  

In [16]:
# for state 0
dataset['states'].append(
    {'actions': [{'id': 0,
         'transitions': [{'id': 0, 'probability': 1, 'reward': 0, 'to': 1}]},
        {'id': 1,
         'transitions': [{'id': 0, 'probability': 1, 'reward': 0, 'to': 2}]}],
       'id': 0}
)

In [17]:
# for state 7
dataset['states'].append(
    {'actions': [{'id': 0,
         'transitions': [{'id': 0, 'probability': 1, 'reward': 1, 'to': 5}]},
        {'id': 1,
         'transitions': [{'id': 0, 'probability': 1, 'reward': 1, 'to': 5}]}],
       'id': 7}
)

In [18]:
# for staet 8
dataset['states'].append(
    {'actions': [{'id': 0,
         'transitions': [{'id': 0, 'probability': 1, 'reward': 1, 'to': 6}]},
        {'id': 1,
         'transitions': [{'id': 0, 'probability': 1, 'reward': 0, 'to': 9}]}],
       'id': 8}
)

In [19]:
# for terminal state 10
dataset['states'].append(
    {'actions': [{'id': 0,
         'transitions': [{'id': 0, 'probability': 1, 'reward': 1, 'to': 9}]},
        {'id': 1,
         'transitions': [{'id': 0, 'probability': 1, 'reward': 1, 'to': 9}]}],
       'id': 9}
)

In [20]:
# save as json file
with open("figure_2_3.json", "w") as write_file:
    json.dump(dataset, write_file)

Figure 2.3 Modification
<img src="figure_2_3.png">

In [21]:
# generate 15-16 iterations for PI
# the trick is to create transitions with short benefit but lose the path the ultimate better path
# create loop back to earlier states but with reward, e.g. 26-->28 and 27-->25

# iterate no.of states
dataset = dict()
dataset['gamma'] = 0.75
dataset['states'] = []
for i in range(1,27):
    states_i = dict()
    states_i['id'] = i
    states_i['actions'] = []
    # iterate two possible actions, each action correspond to deterministic transition
    for j in range(2):
        actions_i_j = dict()
        actions_i_j['id'] = j
        actions_i_j['transitions'] = []
        if (i%2 == 1 or i == 0):
            transitions_i_j = dict()
            transitions_i_j['id'] = 0
            transitions_i_j['probability'] = 1
            transitions_i_j['reward'] = 0
            transitions_i_j['to'] = i+j+2
            actions_i_j['transitions'].append(transitions_i_j)
        else:
            # iterate two possible transitions
            for  k in range(2):
                transitions_i_j_k = dict()
                transitions_i_j_k['id'] = 0
                transitions_i_j_k['probability'] = 0.5
                transitions_i_j_k['reward'] = 0
                transitions_i_j_k['to'] = i+k+1
                actions_i_j['transitions'].append(transitions_i_j_k)
        states_i['actions'].append(actions_i_j)
    dataset['states'].append(states_i)   

In [22]:
# for state 0
dataset['states'].append(
    {'actions': [{'id': 0,
         'transitions': [{'id': 0, 'probability': 1, 'reward': 0, 'to': 1}]},
        {'id': 1,
         'transitions': [{'id': 0, 'probability': 1, 'reward': 0, 'to': 2}]}],
       'id': 0}
)

In [23]:
# for state 27, loop back to state 25
dataset['states'].append(
    {'actions': [{'id': 0,
         'transitions': [{'id': 0, 'probability': 1, 'reward': 1, 'to': 25}]},
        {'id': 1,
         'transitions': [{'id': 0, 'probability': 1, 'reward': 1, 'to': 25}]}],
       'id': 27}
)

In [24]:
# for state 28, construct path with reward back to state 26
dataset['states'].append(
    {'actions': [{'id': 0,
         'transitions': [{'id': 0, 'probability': 1, 'reward': 1, 'to': 26}]},
        {'id': 1,
         'transitions': [{'id': 0, 'probability': 1, 'reward': 0, 'to': 29}]}],
       'id': 28}
)

In [25]:
# terminal state 29 with the reward
dataset['states'].append(
    {'actions': [{'id': 0,
         'transitions': [{'id': 0, 'probability': 1, 'reward': 1, 'to': 29}]},
        {'id': 1,
         'transitions': [{'id': 0, 'probability': 1, 'reward': 1, 'to': 29}]}],
       'id': 29}
)

In [26]:
with open("test.json", "w") as write_file:
    json.dump(dataset, write_file)

Graph HW3
<img src="submitted.png">