# Postprocessing of the computed optimal policy

In [10]:
import util as ut
import numpy as np

In [11]:
mdp = ut.get_file("2_mdps.pickle")
print(f"File contains info for {len(mdp)} flights.")

File contains info for 2 flights.


In [12]:
flight_index = 0
costs = mdp[flight_index][1][1]
actions = mdp[flight_index][1][3]
probabilities = mdp[flight_index][1][5]

In [13]:
# obtain parameters
T = len(costs)
print(f"Time horizon: {T}")

max_num_neighbors = 0
for t in range(T):
    list_lengths = [len(lst) for lst in actions[t].values()]
    max_num_neighbors = max(max_num_neighbors, max(list_lengths))
print(f"Max number of neighbors (number of actions needed): {max_num_neighbors}")
numActions = max_num_neighbors

# Find all visible states for all time steps
states = set(key for state_at_time in actions for key in state_at_time.keys())
s2i = {value: index for index, value in enumerate(states)}
i2s = {index: value for index, value in enumerate(states)}
i2s[s2i[13558]] == 13558 # sanity check
numStates = len(states)
print(f"Number of actual states: {numStates}")
print(f"Number of states in infinite horizon MDP (S*T): {numStates*T}")

Time horizon: 24
Max number of neighbors (number of actions needed): 75
Number of actual states: 10867
Number of states in infinite horizon MDP (S*T): 260808


Load policy file

In [14]:
policy_data = np.loadtxt("out/policy.out", dtype=int)

Mappings $\mathcal{T} \times \mathcal{S} \leftrightarrow \{0, \dots, (S\cdot T) - 1\}$

In [15]:
# Col mappings for flattened transition probability tensor (also row mapping for stage cost matrix)
def global_col_P(time, state):
    return time * numStates + state

def inv_global_col_P(index):
    time = index // numStates
    state = index % numStates
    return time, state

#### Extract policy for visible states for each timestep
as a list of dictionaries --> usage `policy[t][s]` where `t` is the timestep and `s` must be an element of `actions[t].keys()`

In [16]:
policy_a = [] # contains action indices for each (t, s)
policy_s = [] # contains state id for each (t, s)

for t in range(T):
    policy_a.append({})
    policy_s.append({})
    visible_states = actions[t].keys()
    for s in visible_states:
        policy_a[t][s] = policy_data[global_col_P(t, s2i[s])]
        assert len(actions[t][s]) > policy_a[t][s] # sanity check that action index is valid
        policy_s[t][s] = actions[t][s][policy_a[t][s]]


In [17]:
policy_a[0]

{13558: 10,
 13546: 1,
 13547: 0,
 13548: 0,
 11522: 3,
 11523: 2,
 11524: 1,
 10: 2,
 12: 1,
 14: 2,
 15240: 1,
 15241: 1,
 15242: 1,
 5669: 32,
 5670: 30,
 5671: 9,
 13556: 0,
 13557: 1,
 5672: 10,
 13559: 6,
 5673: 15,
 13560: 6,
 2622: 7,
 2623: 0,
 2624: 2,
 10091: 10,
 10092: 1,
 10093: 3,
 11785: 0,
 11786: 3,
 11787: 3,
 7638: 11,
 7639: 1,
 7640: 0,
 2625: 2,
 10094: 3,
 11788: 2,
 7641: 0,
 2626: 12,
 10095: 15,
 11789: 0,
 7642: 6,
 9: 2,
 11: 3,
 13: 5,
 361: 1,
 363: 3,
 365: 4,
 26: 3,
 27: 6,
 28: 7,
 11565: 3,
 11566: 6,
 11567: 6,
 15: 4,
 367: 3,
 29: 5,
 11568: 5,
 17: 3,
 369: 1,
 30: 2,
 11569: 3,
 5650: 0,
 5652: 10,
 5654: 9,
 2214: 23,
 2215: 16,
 2216: 27,
 10794: 0,
 10795: 5,
 10796: 4,
 10959: 18,
 10960: 13,
 10961: 16,
 13544: 0,
 13545: 2,
 13731: 0,
 13732: 5,
 13733: 4,
 9793: 0,
 9794: 10,
 9795: 9,
 7857: 24,
 7858: 20,
 7859: 23,
 1785: 0,
 1786: 15,
 1787: 14,
 1722: 0,
 1724: 15,
 1726: 14,
 14413: 16,
 14414: 9,
 14415: 15,
 12840: 0,
 12841: 5,
 

In [18]:
policy_s[0]

{13558: 13548,
 13546: 13558,
 13547: 13558,
 13548: 13558,
 11522: 13558,
 11523: 13558,
 11524: 13558,
 10: 13558,
 12: 13558,
 14: 13558,
 15240: 13558,
 15241: 13558,
 15242: 13558,
 5669: 13546,
 5670: 13547,
 5671: 13735,
 13556: 11522,
 13557: 15241,
 5672: 13735,
 13559: 13548,
 5673: 13735,
 13560: 13548,
 2622: 11522,
 2623: 11523,
 2624: 11523,
 10091: 11522,
 10092: 11523,
 10093: 11523,
 11785: 11522,
 11786: 11523,
 11787: 11523,
 7638: 11522,
 7639: 11523,
 7640: 11523,
 2625: 11523,
 10094: 11523,
 11788: 11523,
 7641: 11523,
 2626: 11524,
 10095: 11524,
 11789: 11524,
 7642: 7861,
 9: 10,
 11: 12,
 13: 12,
 361: 10,
 363: 12,
 365: 12,
 26: 10,
 27: 12,
 28: 12,
 11565: 10,
 11566: 12,
 11567: 12,
 15: 12,
 367: 12,
 29: 12,
 11568: 12,
 17: 14,
 369: 14,
 30: 14,
 11569: 14,
 5650: 5669,
 5652: 5670,
 5654: 5671,
 2214: 5669,
 2215: 5670,
 2216: 5671,
 10794: 5669,
 10795: 5670,
 10796: 5671,
 10959: 5669,
 10960: 5670,
 10961: 5671,
 13544: 5669,
 13545: 13557,
 1373