In [1]:
import util as ut

In [2]:
mdp = ut.get_file("2_mdps.pickle")
print(f"File contains info for {len(mdp)} MDPs.")

File contains info for 2 MDPs.


In [3]:
flight_index = 0
costs = mdp[flight_index][1][1]
actions = mdp[flight_index][1][3]
probabilities = mdp[flight_index][1][5]

In [4]:
# time horizon
T = len(costs)
print(f"Time horizon: {T}")

Time horizon: 24


Find maximum number of neighbors to determine size of action space:

In [5]:
max_num_neighbors = 0
for t in range(T):
    list_lengths = [len(lst) for lst in actions[t].values()]
    max_num_neighbors = max(max_num_neighbors, max(list_lengths))
print(f"Max number of neighbors: {max_num_neighbors}")
numActions = max_num_neighbors

Max number of neighbors: 75


Compute set of states (all visible neighbors for all timesteps, with duplicates removed). Then map set (with missing values) to range 0 to n-1, in order to reduce matrix size

In [6]:
# Find all visible states for all time steps
states = set(key for state_at_time in actions for key in state_at_time.keys())
s2i = {value: index for index, value in enumerate(states)}
i2s = {index: value for index, value in enumerate(states)}
i2s[s2i[13558]] == 13558 # sanity check

True

Data set checks

In [7]:
# Check data set consistency: every (state, action) pair has #transitions == #neighbors == #actions
for t in range(T):
    visible_states = actions[t].keys()
    for state in visible_states:
        #print(f"({t}, {state}):      {actions[t][state]}")
        n = len(actions[t][state])
        for action in range(n):
            assert len(probabilities[t][(state, action)]) == n

In [8]:
# Check that all probabilities sum to 1
for t in range(T):
    visible_states = actions[t].keys()
    for state in visible_states:
        n = len(actions[t][state])
        for action in range(n):
            assert abs(sum(probabilities[t][(state, action)]) - 1) < 1e-6

In [9]:
# row of transition matrix for state 13558 action 0, time 0
print(probabilities[0][(13558, 0)])

# neighbors of state 13558
print(actions[0][13558])

# example insertion into non-flattened transition probability tensor
for i in range(len(actions[0][13558])):
    print(f"insert {probabilities[0][(13558, 0)][i]} at index {s2i[actions[0][13558][i]]}")

[0.95, 0, 0, 0.050000000000000044, 0, 0, 0, 0, 0, 0, 0, 0, 0]
[11522, 11524, 15241, 13558, 11523, 15240, 10, 14, 13547, 13546, 13548, 12, 15242]
insert 0.95 at index 8362
insert 0 at index 8364
insert 0 at index 10862
insert 0.050000000000000044 at index 9704
insert 0 at index 8363
insert 0 at index 10861
insert 0 at index 8
insert 0 at index 12
insert 0 at index 9694
insert 0 at index 9693
insert 0 at index 9695
insert 0 at index 10
insert 0 at index 10863
