# Postprocessing of the computed optimal policy

In [34]:
import util as ut
import numpy as np

In [35]:
mdp = ut.get_file("2_mdps.pickle")
print(f"File contains info for {len(mdp)} flights.")

File contains info for 2 flights.


In [36]:
flight_index = 0
costs = mdp[flight_index][1][1]
actions = mdp[flight_index][1][3]
probabilities = mdp[flight_index][1][5]

In [37]:
# obtain parameters
T = len(costs)
print(f"Time horizon: {T}")

max_num_neighbors = 0
for t in range(T):
    list_lengths = [len(lst) for lst in actions[t].values()]
    max_num_neighbors = max(max_num_neighbors, max(list_lengths))
print(f"Max number of neighbors (number of actions needed): {max_num_neighbors}")
numActions = max_num_neighbors

# Find all visible states for all time steps
states = set(key for state_at_time in actions for key in state_at_time.keys())
s2i = {value: index for index, value in enumerate(states)}
i2s = {index: value for index, value in enumerate(states)}
i2s[s2i[13558]] == 13558 # sanity check
numStates = len(states)
print(f"Number of actual states: {numStates}")
print(f"Number of states in infinite horizon MDP (S*T): {numStates*T}")

Time horizon: 24
Max number of neighbors (number of actions needed): 75
Number of actual states: 10867
Number of states in infinite horizon MDP (S*T): 260808


Load policy file

In [38]:
policy_data = np.loadtxt(f"out/{flight_index}_policy_NEW.out", dtype=int)

Mappings $\mathcal{T} \times \mathcal{S} \leftrightarrow \{0, \dots, (S\cdot T) - 1\}$

In [39]:
# Col mappings for flattened transition probability tensor (also row mapping for stage cost matrix)

def global_col_P(time, state):
    return time * numStates + state

def inv_global_col_P(index):
    time = index // numStates
    state = index % numStates
    return time, state

#### Extract policy for visible states for each timestep
as a list of dictionaries --> usage `policy[t][s]` where `t` is the timestep and `s` must be an element of `actions[t].keys()`

In [40]:
policy_a = [] # contains action indices for each (t, s)
policy_s = [] # contains state id for each (t, s)

for t in range(T):
    policy_a.append({})
    policy_s.append({})
    visible_states = actions[t].keys()

    for s in visible_states:
        policy_a[t][s] = policy_data[global_col_P(t, s2i[s])]
        assert len(actions[t][s]) > policy_a[t][s] # sanity check that action index is valid
        policy_s[t][s] = actions[t][s][policy_a[t][s]]


In [41]:
# absorbing state

#### Generate flight trajectories

In [42]:
id2name = ut.get_file("num2node_dict.pickle")
name2id = ut.get_file("node2num_dict.pickle")

In [43]:
def generate_flight_trajectory(start_state_name):
    trajectory = [start_state_name]
    for t in range(0,T):
        trajectory.append(id2name[policy_s[t][name2id[trajectory[-1]]]])
    return trajectory

def print_trajectory(trajectory):
    print("{:<7s}{:<10s}{:<10s}".format("Time", "Waypoint", "Flight level"))
    for t in range(T):
        state = trajectory[t]
        name, level = state.split("_")
        print("{:<7d}{:<10s}{:<10s}".format(t+1, name, level)) # print 1-based time index

In [47]:
traj = generate_flight_trajectory("VEULE_350") # flight 0
#traj = generate_flight_trajectory("SITET_300") # flight 1
print(f"Optimal trajectory for flight {flight_index}:\n{32*'-'}")
print_trajectory(traj)

Optimal trajectory for flight 0:
--------------------------------
Time   Waypoint  Flight level
1      VEULE     350       
2      INPAX     350       
3      RESMI     350       
4      KOTAP     350       
5      KETEX     350       
6      KUSEK     350       
7      KOTIS     350       
8      KUKOR     350       
9      ADEKA     350       
10     TIS       350       
11     PIMAK     350       
12     VEROT     350       
13     AGREV     350       
14     MTL       350       
15     EVALA     300       
16     XIRBI     250       
17     DOTIG     200       
18     VEDIK     150       
19     AMFOU     150       
20     CUERS     100       
21     AMFOU     50        
22     TIPIK     50        
23     TIPIK     50        
24     TIPIK     50        


In [45]:
name2id["TIPIK_50"]

5413

In [46]:
time = 18
loc = "AMFOU_100"
print(name2id[loc])
print(id2name[5413])

print([id2name[i] for i in actions[time][name2id[loc]]])
nbrs = len(actions[time][name2id[loc]])
print(f"Number of neighbors: {nbrs}")

for a in range(nbrs):
    print(f"Action {a}: probability {probabilities[time][(name2id[loc],a)]}")

5370
TIPIK_50
['TIPIK_100', 'TIPIK_150', 'MEDOK_50', 'UGLET_150', 'GILON_150', 'DGN_100', 'DGN_150', 'MEDOK_100', 'GILON_50', 'CUERS_50', 'GILON_100', 'PERUS_50', 'PERUS_100', 'FMD.A_150', 'UGLET_50', 'UGLET_100', 'DGN_50', 'RUBEB_100', 'FMD.A_50', 'TIPIK_50', 'CUERS_150', 'VEDIK_100', 'VEDIK_50', 'RUBEB_50', 'FMD.A_100', 'CUERS_100', 'VEDIK_150', 'RUBEB_150', 'MEDOK_150', 'PERUS_150']
Number of neighbors: 30
Action 0: probability [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Action 1: probability [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Action 2: probability [0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Action 3: probability [0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Action 4: probability [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
Action 5: probability 