## Network Simulation and Modeling

Goal: Given the two airline network models, we wish to simulate network traversals of each airline network:
 
--> A hub and spoke based system

--> A point-to-point based system



In [1]:
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import random
from scipy.stats import gaussian_kde
from scipy.integrate import trapz
from scipy.stats import bernoulli


In [6]:
# extract the edge table 
edge_dataset = pd.read_csv("Database\FINAL_EDGE_TABLE.csv")
on_time_data = pd.read_csv('Database\FINAL_ON_TIME_DATA.csv')

def preprocess_data(df):
    '''
    Goal: Extract new edge attributes and reduce edge data table
    Parameters:
        df (pandas dataframe) input hub-spoke or point to point edge data
    Return:
        edge (pandas dataframe) cleaned output network edge data
    '''
    # create another column to preprocess the data and create new data table attributes
    df['net_profit'] = ( df['revenue'] * 1000 - df['total_operating_expense'] ) * df['Proportion of Seats filled']
    # extract route demand metric
    df['Demand_prop'] = df['passengers'] / df['seats']


    # drop additional columns that we wont need for simulation
    edge = df.filter(['Demand_prop', 'net_profit','Origin_Airport_Code','Destination_Airport_Code','Proportion of Seats filled','CANCELLED','citypair'],axis=1)
    # create a networkx graph from this edge dataset
    G = nx.from_pandas_edgelist(df = edge, source = 'Origin_Airport_Code', target = 'Destination_Airport_Code', 
                               edge_attr = ['CANCELLED', 'net_profit','Proportion of Seats filled','citypair','Demand_prop'])
    # convert graph to a directed graph
    G = G.to_directed()
    return G
p_2_p_Graph = preprocess_data(edge_dataset) 

In [49]:
# Let's check if there are any leafs in the Graph --> this will affect our simulation so we must 
# i.e. there exists many cycles in the graph
print([x for x in p_2_p_Graph.nodes() if p_2_p_Graph.in_degree(x)==0])

print([x for x in p_2_p_Graph.nodes() if p_2_p_Graph.out_degree(x)==0])

color=nx.get_edge_attributes(p_2_p_Graph,'net_profit')
color[('EWR','PHX')]

adj_nodes = list(p_2_p_Graph.neighbors('EWR'))
demand=nx.get_edge_attributes(p_2_p_Graph,'Demand_prop')
node_demand_probs = [demand[i] for i in list(p_2_p_Graph.edges('EWR'))] 
node_demand_probs = node_demand_probs / np.sum(node_demand_probs)
next_ind = np.random.choice(np.arange(len(node_demand_probs)),size=1, p=node_demand_probs.astype(float))
next_node = adj_nodes[next_ind[0]]


[]
[]
['AUS', 'BNA', 'DEN', 'FLL', 'MCO', 'MDW', 'OAK', 'PHX', 'SAN', 'STL']


'STL'

In [41]:
np.arange(9)

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [27]:
cancel_prob=nx.get_edge_attributes(p_2_p_Graph,'CANCELLED')
code = cancel_prob[('PHX','EWR')]
r = bernoulli.rvs(p=code, size=1)[0]
r

0

$Pnet_{adjusted} = (Revenue - Operating Expense) * Proportion of Seats Filled $

In [52]:
# random sampling from KDE distributions
def random_samples(citypair,metric):
    # Generate some example data
    # x ~ vector of data based on the following
    # filter data given citypair key and metric argument 
    fltrd = on_time_data.loc[on_time_data['citypair'] == citypair]
    x = (fltrd[metric].to_list())
    # Compute the KDE
    kde = gaussian_kde(x, bw_method='silverman')
    # Truncate the KDE to have support only over x > 0
    pdf_trunc = lambda x: kde(x) * (x >= 0)
    # Normalize the truncated PDF
    norm = trapz(pdf_trunc(np.linspace(0, np.max(x), 1000)), dx=np.max(x)/999)
    pdf_norm = lambda x: pdf_trunc(x) / norm
    # Define the inverse CDF
    x_vals = np.linspace(0, np.max(x), 1000)
    cdf_vals = np.cumsum(pdf_norm(x_vals)) * np.max(x) / 999
    inv_cdf = lambda y: np.interp(y, cdf_vals, x_vals)
    # Generate random samples using inverse transform sampling
    u = np.random.uniform(size=1)
    sample = inv_cdf(u)
    return sample


# define a function to simulate a flight traversal for a sinl
def simulate_flight_traversal(G, start_node, N):
    '''
    Goal: simulate the flight traversal for a single aircraft starting at a given start airport
    Parameters:
        G: The airline network graph (networkx graph object)
        start_node: (string) the starting airport code
        N: number of traversals (edges) to make in the network
        start_node: the selected starting node for 
    Return:
        total_delay_time_accumlated
        total_cancellations
        total_profit generated by this aircraft  
    '''
    current_node = start_node

    # extract each attribute of the graph as a dict
    profits=nx.get_edge_attributes(G,'net_profit')
    cancel_prob=nx.get_edge_attributes(G,'CANCELLED')
    citypair=nx.get_edge_attributes(G,'citypair')

    total_profits = []
    total_dept_delays = []
    total_arrival_delays = []
    number_of_cancel = 0


    for i in range(N):
        # get all adjacent nodes of the current node
        adj_nodes = list(G.neighbors(current_node))
        # randomly select a flight route given the transition probability 
        demand=nx.get_edge_attributes(p_2_p_Graph,'Demand_prop')
        node_demand_probs = [demand[i] for i in list(p_2_p_Graph.edges(current_node))] 
        node_demand_probs = node_demand_probs / np.sum(node_demand_probs)
        next_ind = np.random.choice(np.arange(len(node_demand_probs)),size=1, p=node_demand_probs.astype(float))
        next_node = adj_nodes[next_ind[0]]

        # calculate the profit of the flight route
        profit = profits[(current_node,next_node)]
        total_profits.append(profit)
        # extract 
        # randomly simulate a delay time from the distribution
        code = citypair[(current_node,next_node)]
        dept_delay_time = random_samples(code,'DEP_DELAY_NEW')[0]
        arr_delay_time = random_samples(code, 'ARR_DELAY_NEW')[0]
        total_dept_delays.append(dept_delay_time)
        total_arrival_delays.append(arr_delay_time)

        cancel_p = cancel_prob[(current_node,next_node)]
        # returns a bernoulli 0 or 1 value
        r = bernoulli.rvs(p=cancel_p, size=1)[0]
        number_of_cancel += r
        print(next_node)
        # move to the next node
        current_node = next_node
        # return the profit generated by the flight traversal
    return np.sum(total_profits), np.sum(total_dept_delays), np.sum(total_arrival_delays), number_of_cancel

In [53]:
x,y,z,c = simulate_flight_traversal(G=p_2_p_Graph, start_node='EWR', N=20)

DEN
PIT
DEN
SEA
DAL
SMF
STL
SAT
OAK
MSP
ATL
DEN
SJC
BUR
SMF
LAX
TUS
SJC
AUS
OAK
