# Route Finding What-If Scenarios
Effect of high Centrality Airport Delays on Freight-Forwarding Performance

In [None]:
import pandas as pd
import numpy as np
import itertools

In [None]:
from graphdatascience import GraphDataScience

# Use Neo4j URI and credentials according to your setup
gds = GraphDataScience('neo4j://localhost', auth=('neo4j', 'neo'))

## Get all Historic Routes and Calculate shortest Paths Based on Historic Data

In [None]:
def clear_all_graphs():
    g_names = gds.graph.list().graphName.tolist()
    for g_name in g_names:
        g = gds.graph.get(g_name)
        gds.graph.drop(g)

In [None]:
NODE_PROJ_QUERY = '''
        MATCH(n)
        WHERE n:EntryPoint OR n:DepartureWarehouse OR n:DeparturePoint OR n:ArrivalWarehouse OR n:TransferPoint OR n:Destination
        RETURN id(n) as id, labels(n) as labels
        '''
REL_PROJ_QUERY = '''
        MATCH(n0)-[r:RECEPTION|DEPARTURE|TRANSPORT|DELIVERY]->(n1)
        RETURN id(n0) AS source, id(n1) AS target, type(r) AS type, avg(r.effectiveMinutes) AS averageEffectiveMinutes
        '''

In [None]:
clear_all_graphs()
g, _ = gds.graph.project.cypher('proj', NODE_PROJ_QUERY, REL_PROJ_QUERY)
_

In [None]:
# get all shipments
route_df = gds.run_cypher('''
    MATCH (n:EntryPoint)-[r:RECEPTION]->()
    WITH r.shipmentId AS shipmentId, id(n) AS sourceNodeId, n.airportId AS sourceAirportId
    MATCH (n:Destination)<-[r:DELIVERY {shipmentId : shipmentId}]-()
    WHERE n.airportId <> sourceAirportId
    RETURN sourceNodeId, id(n) AS targetNodeId, sourceAirportId, n.airportId AS targetAirportId, collect(shipmentId) AS shipmentIds, count(*) AS shipmentCount
    ORDER BY shipmentCount DESC
''')
route_df

In [None]:
def get_airport_ids(path_df):
    res = set()
    for ind, row in path_df.iterrows():
        res.update([i.get('airportId') for i in row.path.nodes])
    return res

def get_best_path_airport_ids(path_df):
    res = set()
    for ind, row in path_df[path_df.totalCost == path_df.totalCost.min()].iterrows():
        res.update([i.get('airportId') for i in row.path.nodes])
    return res

In [None]:
%%time
import time
path_dfs={}
for ind, row in route_df.iterrows():
    path_df = gds.shortestPath.yens.stream(g, sourceNode=row.sourceNodeId, targetNode=row.targetNodeId,
                                           k=20, relationshipWeightProperty='averageEffectiveMinutes')
    path_dfs[(row.sourceAirportId, row.targetAirportId)] = path_df

## Calculate Recommended Fright Forwarding Solutions for Historic Shipments

In [None]:
shipment_df = route_df.explode('shipmentIds').groupby(['shipmentIds', 'targetNodeId', 'targetAirportId'])\
    .agg({'sourceNodeId':list, 'sourceAirportId':list, }).reset_index()
shipment_df

In [None]:
def get_solution(row, path_dfs, multiplier_airport_id=None, multiplier=3.0):
    solution = row.solutionIndex
    rels = {}
    path_costs = []
    airport_ids = set()
    for n in range(len(solution)):
        path_costs.append(path_dfs[n].totalCost[solution[n]])
        for r in path_dfs[n].path[solution[n]].relationships:
            rels[f'{r.start_node.id}-{r.end_node.id}'] = r
            m=1.0
            airport_id = r.end_node.get('airportId')
            airport_ids.add(airport_id)
            if (multiplier_airport_id is not None) and (airport_id == multiplier_airport_id):
                m = multiplier
    return sum([r.get('cost')*m for r in rels.values()]), path_costs, list(rels.values()), airport_ids


def best_k_solutions(path_df_dict, source_airport_ids, target_airport_id, top_k=10, multiplier_airport_id=None, multiplier=3.0):
    path_df_list = []
    for source_airport_id in source_airport_ids:
        path_df_list.append(path_df_dict[(source_airport_id, target_airport_id)].copy())

    solution_df = pd.DataFrame(itertools.product(*[range(path_df.shape[0]) for path_df in path_df_list]))\
        .apply(tuple, axis=1).to_frame(name='solutionIndex')
    solution_df[['totalCost','pathCosts', 'relationships', 'airportIds']] = solution_df.apply(get_solution,
                                                                                args=(path_df_list, multiplier_airport_id, multiplier), axis=1, result_type='expand')
    return solution_df.sort_values('totalCost')[:top_k].reset_index(drop=True)

def best_solution(path_df_dict, source_airport_ids, target_airport_id, multiplier_airport_id=None, multiplier=3.0):
    res = best_k_solutions(path_df_dict, source_airport_ids, target_airport_id, top_k=1,
                           multiplier_airport_id=multiplier_airport_id, multiplier=multiplier)
    return res.loc[0,'totalCost'], res.loc[0,'airportIds']

def baseline_solution(row, path_df_dict):
    return best_solution(path_df_dict, row.sourceAirportId, row.targetAirportId)

def top_solution(row, path_df_dict, multiplier_airport_id, multiplier=3.0):
    if multiplier_airport_id not in row.baselineAirportIds:
        return row.baselineCost
    return best_solution(path_df_dict, row.sourceAirportId, row.targetAirportId, multiplier_airport_id, multiplier)[0]

Taking a sub-sample will cut down on computation time

In [None]:
shipment_df_sample = shipment_df.sample(n=1000)

In [None]:
%%time
shipment_df_sample[['baselineCost', 'baselineAirportIds']] = \
    shipment_df_sample.apply(baseline_solution, axis=1, args=[path_dfs], result_type='expand')

In [None]:
shipment_df_sample

## What-If Scenarios
For each airport that was used as a transfer point, simulate a 3x delay in processing time and re-calculate best freight forwarding combinations

In [None]:
airport_ids = gds.run_cypher('''
    MATCH (n:TransferPoint)-[r:RECEPTION]->()
    RETURN DISTINCT n.airportId as airportId
''')['airportId'].tolist()

In [None]:
%%time
cnt = 0
for airport_id in airport_ids:
    shipment_df_sample[ f'cost_{airport_id}_delay'] = shipment_df_sample.apply(top_solution, axis=1,
                                             args=(path_dfs, airport_id, 3.0), result_type='expand')
    cnt+=1

In [None]:
shipment_df_sample

## Aggregate Total Delay Time and Number of Delays Then Compare to Centrality Metrics

In [None]:
total_baseline = sum(shipment_df_sample['baselineCost'])
total_delays = []
for airport_id in airport_ids:
    total_delays.append({'airportId': airport_id,
                         'totalTimePercIncrease': (sum(shipment_df_sample[f'cost_{airport_id}_delay']) - total_baseline)/total_baseline,
                         'totalTimeIncrease': sum(shipment_df_sample[f'cost_{airport_id}_delay']) - total_baseline,
                         'numberOfDelayedShipments': sum(shipment_df_sample[f'cost_{airport_id}_delay'] > shipment_df_sample['baselineCost']),
                         'maxDelayTime': max(shipment_df_sample['cost_140_delay'] - shipment_df_sample['baselineCost'])})
total_delay_df = pd.DataFrame(total_delays)
total_delay_df

In [None]:
gds.run_cypher('''
    MATCH(a1:Airport)<-[:LOCATED_AT]-(d1:DeparturePoint)-[r:TRANSPORT]->(d2:ArrivalWarehouse)-[:LOCATED_AT]->(a2:Airport)
    WITH a1, a2, count(r) AS flightCount
    MERGE (a1)-[s:SENDS_TO]->(a2)
    SET s.flightCount = flightCount
    RETURN count(s)
''')

In [None]:
g.drop()
# Create the in-memory graph projection
g, _ = gds.graph.project('proj', 'Airport', {'SENDS_TO': {'properties': ['flightCount']}})
# calculate and write out-degree centrality
gds.degree.write(g, relationshipWeightProperty='flightCount', writeProperty='outDegreeCentrality')
# calculate and write betweenness centrality
gds.betweenness.write(g, writeProperty='betweennessCentrality')
#calculate and write eigenvector centrality
gds.eigenvector.write(g, relationshipWeightProperty='flightCount', writeProperty='eigenvectorCentrality')
# drop the projected in-memory graph
g.drop()
## Calculate In-Degree Centrality on REVERSED Orientation
g, _ = gds.graph.project('proj', 'Airport', {'SENDS_TO': {'orientation': 'REVERSE', 'properties': ['flightCount']}})
gds.degree.write(g, relationshipWeightProperty='flightCount', writeProperty='inDegreeCentrality')
g.drop()

In [None]:
airport_df = gds.run_cypher('''
    MATCH(a:Airport)
    RETURN a.airportId as airportId,
        a.name AS name,
        a.inDegreeCentrality AS inDegreeCentrality,
        a.outDegreeCentrality AS outDegreeCentrality,
        a.betweennessCentrality AS betweennessCentrality,
        a.eigenvectorCentrality AS eigenvectorCentrality
''')
airport_df

In [None]:
df = total_delay_df.merge(airport_df, on='airportId')

In [None]:
df[['totalTimeIncrease', 'totalTimePercIncrease', 'numberOfDelayedShipments', 'maxDelayTime',
    'betweennessCentrality', 'eigenvectorCentrality', 'inDegreeCentrality', 'outDegreeCentrality']].corr()

In [None]:
df.sort_values('numberOfDelayedShipments', ascending=False)