# Freight Forwarding  Graph Import

<img src="img/logistics-diagram.png" alt="summary" width="1000"/>

In [65]:
import pandas as pd

## Connect ot Graph Data Science

In [67]:
from graphdatascience import GraphDataScience

# Use Neo4j URI and credentials according to your setup
gds = GraphDataScience('neo4j://localhost', auth=('neo4j', 'neo'))

## Staging for ETL
1. Clear the graph of any existing data and indexes
2. read the source data
3. create helper functions
4. Set Neo4j Indexes


In [68]:
# Clear last graph - All data and schema attributes
gds.run_cypher('MATCH(n) DETACH DELETE n')
gds.run_cypher('CALL apoc.schema.assert({},{})')

Unnamed: 0,label,key,keys,unique,action


In [69]:
df = pd.read_csv('https://s-cube-network.eu/c2k-files/c2k_data_comma.csv', dtype=str)

In [70]:
LEGS = [1,2,3]
SEGMENTS = [1,2,3]
GOINGS = ['i','o']
SERVICES = ['rcs', 'dlv']

In [71]:
# Helper Functions
def get_last_i1_rcf_place(row):
    for s in [3,2,1]:
        if row[f'i1_rcf_{s}_place'] != '?':
            return row[f'i1_rcf_{s}_place']
    raise Exception("cannot find last rcf place")

def get_last_outbound_rcf_place(row):
    for s in [3,2,1]:
        if row[f'o_rcf_{s}_place'] != '?':
            return row[f'o_rcf_{s}_place']
    raise Exception("cannot find last rcf place")

def get_path_indexes(row):
    res = {'o': []}
    end = str(get_last_outbound_rcf_place(row))
    for i in LEGS:
        start = str(row[f'i{i}_dep_1_place'])
        if start != '?':
            ind = start + '-' + end
            res[f'i{i}'] = ind
            res['o'].append(ind)
    return res

In [72]:
# assign last inbound & outbound rcf place
df['last_o_rcf_place'] = df.apply(get_last_outbound_rcf_place, axis =1)
df['last_i_rcf_place'] = df.apply(get_last_i1_rcf_place, axis =1)
# assign path index
df['path_indexes'] = df.apply(get_path_indexes, axis=1)
df

Unnamed: 0,nr,i1_legid,i1_rcs_p,i1_rcs_e,i1_dep_1_p,i1_dep_1_e,i1_dep_1_place,i1_rcf_1_p,i1_rcf_1_e,i1_rcf_1_place,...,o_rcf_3_p,o_rcf_3_e,o_rcf_3_place,o_dlv_p,o_dlv_e,o_hops,legs,last_o_rcf_place,last_i_rcf_place,path_indexes
0,0,5182,199,218,210,215,609,935,736,256,...,?,?,?,780,434,1,2,411,256,"{'o': ['609-411', '431-411'], 'i1': '609-411',..."
1,1,6523,844,584,90,297,700,1935,1415,431,...,?,?,?,3870,445,1,2,256,431,"{'o': ['700-256', '257-256'], 'i1': '700-256',..."
2,2,5878,4380,4119,90,280,456,905,547,700,...,?,?,?,550,1520,1,1,349,700,"{'o': ['456-349'], 'i1': '456-349'}"
3,3,1275,759,169,240,777,173,340,577,349,...,?,?,?,3780,159,1,1,700,671,"{'o': ['173-700'], 'i1': '173-700'}"
4,4,8117,1597,1485,150,241,411,585,612,128,...,?,?,?,4140,4797,2,1,411,166,"{'o': ['411-411'], 'i1': '411-411'}"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3938,3939,4225,122,19,240,278,815,830,761,174,...,?,?,?,1665,1300,2,3,293,174,"{'o': ['815-293', '671-293', '671-293'], 'i1':..."
3939,3940,14017,2028,413,270,1825,605,2610,2535,349,...,?,?,?,3780,807,2,1,113,597,"{'o': ['605-113'], 'i1': '605-113'}"
3940,3941,4660,1356,178,240,1359,815,760,716,609,...,?,?,?,5100,4381,2,1,737,609,"{'o': ['815-737'], 'i1': '815-737'}"
3941,3942,6472,2692,1856,90,867,700,1060,1049,113,...,?,?,?,3780,945,2,2,635,113,"{'o': ['700-635', '456-635'], 'i1': '700-635',..."


In [73]:
gds.run_cypher('CREATE CONSTRAINT airport_unique IF NOT EXISTS ON (n:Airport) ASSERT n.airportId  IS UNIQUE')

gds.run_cypher('CREATE CONSTRAINT airport_entry_unique IF NOT EXISTS ON (n:EntryPoint) ASSERT n.airportId  IS UNIQUE')

gds.run_cypher('CREATE INDEX check_in_shipment_id IF NOT EXISTS FOR ()-[r:FREIGHT_RECEPTION]-() ON (r.shipmentId)')
gds.run_cypher('CREATE INDEX check_in_path IF NOT EXISTS FOR ()-[r:FREIGHT_RECEPTION]-() ON (r.pathIndex)')
gds.run_cypher('CREATE CONSTRAINT airport_departure_checkpoint_unique IF NOT EXISTS ON (n:DepartureWarehouse) ASSERT n.airportId  IS UNIQUE')

gds.run_cypher('CREATE INDEX confirm_shipment_id IF NOT EXISTS FOR ()-[r:FREIGHT_DEPARTURE]-() ON (r.shipmentId)')
gds.run_cypher('CREATE INDEX confirm_path IF NOT EXISTS FOR ()-[r:FREIGHT_DEPARTURE]-() ON (r.pathIndex)')
gds.run_cypher('CREATE CONSTRAINT airport_departure_unique IF NOT EXISTS ON (n:DeparturePoint) ASSERT n.airportId  IS UNIQUE')

gds.run_cypher('CREATE INDEX depart_shipment_id IF NOT EXISTS FOR ()-[r:FREIGHT_TRANSPORT]-() ON (r.shipmentId)')
gds.run_cypher('CREATE INDEX depart_path IF NOT EXISTS FOR ()-[r:FREIGHT_TRANSPORT]-() ON (r.pathIndex)')
gds.run_cypher('CREATE CONSTRAINT airport_arrival_unique IF NOT EXISTS ON (n:ArrivalWarehouse) ASSERT n.airportId  IS UNIQUE')

gds.run_cypher('CREATE INDEX connect_shipment_id IF NOT EXISTS FOR ()-[r:CONNECT]-() ON (r.shipmentId)')
gds.run_cypher('CREATE INDEX connect_path IF NOT EXISTS FOR ()-[r:CONNECT]-() ON (r.pathIndex)')

gds.run_cypher('CREATE INDEX deliver_shipment_id IF NOT EXISTS FOR ()-[r:FREIGHT_DELIVERY]-() ON (r.shipmentId)')
gds.run_cypher('CREATE INDEX deliver_path IF NOT EXISTS FOR ()-[r:FREIGHT_DELIVERY]-() ON (r.pathIndex)')
gds.run_cypher('CREATE CONSTRAINT airport_destination_unique  IF NOT EXISTS ON (n:Destination) ASSERT n.airportId  IS UNIQUE')

gds.run_cypher('CREATE INDEX transfer_shipment_id IF NOT EXISTS FOR ()-[r:TRANSFER]-() ON (r.shipmentId)')
gds.run_cypher('CREATE INDEX transfer_path IF NOT EXISTS FOR ()-[r:TRANSFER]-() ON (r.pathIndex)')

## Ingest Nodes
Nodes will consist of Airports and freight forwarding checkpoints

In [74]:
# Load nodes and draw location at relationships
cols = df.columns.tolist()

airport_ids = set()
for col in cols:
    if 'place' in col:
        #print([i for i in df.loc[(df[col] != '?') & (df[col].notna()), col].tolist()])
        airport_ids.update([int(i) for i in df.loc[(df[col] != '?') & (df[col].notna()), col].unique().tolist()])

gds.run_cypher('''
    UNWIND $airportIds AS airportId
    MERGE(n0:Airport {airportId: airportId})
    MERGE(n1:EntryPoint {airportId: airportId})
    MERGE(n2:DepartureWarehouse{airportId: airportId})
    MERGE(n3:DeparturePoint {airportId: airportId})
    MERGE(n4:ArrivalWarehouse {airportId: airportId})
    MERGE(n5:Destination {airportId: airportId})

    MERGE(n0)<-[r1:LOCATED_AT]-(n1)
    MERGE(n0)<-[r2:LOCATED_AT]-(n2)
    MERGE(n0)<-[r3:LOCATED_AT]-(n3)
    MERGE(n0)<-[r4:LOCATED_AT]-(n4)
    MERGE(n0)<-[r5:LOCATED_AT]-(n5)
    RETURN count(n0), count(n1), count(n2), count(n3), count(n4), count(n5)
    ''', params={'airportIds':list(airport_ids)})

Unnamed: 0,count(n0),count(n1),count(n2),count(n3),count(n4),count(n5)
0,237,237,237,237,237,237


## Ingest Relationships
Relationships will consist of the freight forwarding steps as well as links representing connection between transportation segments and transfer between the inbound and output stage of the shipment

In [75]:
# (n:EntryPoint)-[r:FREIGHT_RECEPTION]-(m:DepartureWarehouse) RCS inbound
for l in LEGS:
    sub_dict = df.loc[(df[f'i{l}_legid'] != '?') & (df[f'i{l}_legid'].notna()),
                      ['nr', f'i{l}_legid', f'i{l}_rcs_p', f'i{l}_rcs_e', f'i{l}_dep_1_place', 'path_indexes']].to_dict('records')
    res = gds.run_cypher(f'''
        UNWIND $relMaps AS relMap
        WITH toInteger(relMap.nr) AS shipmentId,
            toInteger(relMap.i{l}_dep_1_place) AS airportId,
            toInteger(relMap.i{l}_legid) AS legId,
            toInteger(relMap.i{l}_rcs_e) AS effectiveMinutes,
            toInteger(relMap.i{l}_rcs_p) AS plannedMinutes,
            relMap.path_indexes.i{l} AS pathIndex
        MATCH(n1:EntryPoint {{airportId: airportId}})
        MATCH(n2:DepartureWarehouse {{airportId: airportId}})
        MERGE(n1)-[r:FREIGHT_RECEPTION {{shipmentId: shipmentId, legId: legId, legNumber: $legNumber}}]->(n2)
        ON CREATE SET r.plannedMinutes=plannedMinutes,
        r.effectiveMinutes=effectiveMinutes,
        r.pathIndex = pathIndex
        RETURN count(r)
    ''', params={'relMaps':sub_dict, 'legNumber':l})
    print(res)

   count(r)
0      3942
   count(r)
0      2624
   count(r)
0      1366


In [76]:
# (n:EntryPoint)-[r:FREIGHT_RECEPTION]-(m:DepartureWarehouse) RCS outbound

sub_dict = df.loc[(df[f'o_legid'] != '?') & (df[f'o_legid'].notna()),
                  ['nr', 'o_legid', 'o_rcs_p', 'o_rcs_e', 'o_dep_1_place', 'path_indexes']].to_dict('records')
gds.run_cypher('''
    UNWIND $relMaps AS relMap
    WITH toInteger(relMap.nr) AS shipmentId,
        toInteger(relMap.o_dep_1_place) AS airportId,
        toInteger(relMap.o_legid) AS legId,
        toInteger(relMap.o_rcs_e) AS effectiveMinutes,
        toInteger(relMap.o_rcs_p) AS plannedMinutes,
        relMap.path_indexes.o AS pathIndex
    MATCH(n1:EntryPoint {airportId: airportId})
    MATCH(n2:DepartureWarehouse {airportId: airportId})
    MERGE(n1)-[r:FREIGHT_RECEPTION {shipmentId: shipmentId, legId: legId, legNumber: -1}]->(n2)
    ON CREATE SET r.plannedMinutes=plannedMinutes,
        r.effectiveMinutes=effectiveMinutes,
        r.pathIndex = pathIndex
    RETURN count(r)
''', params={'relMaps':sub_dict})

Unnamed: 0,count(r)
0,3942


In [77]:
# (:DepartureWarehouse)-[r:FREIGHT_DEPARTURE]-(:DeparturePoint) DEP1 inbound
for l in LEGS:
    print(f'== LEG {l} ======================')
    for s in SEGMENTS:
        print(f'-- SEGMENT {s} ----------------------')
        sub_dict = df.loc[(df[f'i{l}_dep_{s}_place'] != '?') & (df[f'i{l}_legid'].notna()),
            ['nr', f'i{l}_legid', f'i{l}_dep_{s}_p', f'i{l}_dep_{s}_e', f'i{l}_dep_{s}_place', 'path_indexes']]\
            .to_dict('records')
        res = gds.run_cypher(f'''
            UNWIND $relMaps AS relMap
            WITH toInteger(relMap.nr) AS shipmentId,
                toInteger(relMap.i{l}_dep_{s}_place) AS airportId,
                toInteger(relMap.i{l}_legid) AS legId,
                toInteger(relMap.i{l}_dep_{s}_e) AS effectiveMinutes,
                toInteger(relMap.i{l}_dep_{s}_p) AS plannedMinutes,
                relMap.path_indexes.i{l} AS pathIndex
            MATCH(n1:DepartureWarehouse {{airportId: airportId}})
            MATCH(n2:DeparturePoint {{airportId: airportId}})
            MERGE(n1)-[r:FREIGHT_DEPARTURE {{shipmentId: shipmentId, legId: legId, legNumber: $legNumber, segmentNumber: $segmentNumber}}]->(n2)
            ON CREATE SET r.plannedMinutes=plannedMinutes,
                r.effectiveMinutes=effectiveMinutes,
                r.pathIndex = pathIndex
            RETURN count(r)
        ''', params={'relMaps':sub_dict, 'legNumber':l, 'segmentNumber':s})
        print(res)

-- SEGMENT 1 ----------------------
   count(r)
0      3942
-- SEGMENT 2 ----------------------
   count(r)
0      1195
-- SEGMENT 3 ----------------------
   count(r)
0        23
-- SEGMENT 1 ----------------------
   count(r)
0      2624
-- SEGMENT 2 ----------------------
   count(r)
0       791
-- SEGMENT 3 ----------------------
   count(r)
0        14
-- SEGMENT 1 ----------------------
   count(r)
0      1366
-- SEGMENT 2 ----------------------
   count(r)
0       391
-- SEGMENT 3 ----------------------
   count(r)
0         8


In [78]:
# (:DepartureWarehouse)-[r:FREIGHT_DEPARTURE]-(:DeparturePoint) DEP1 outbound
for s in SEGMENTS:
    print(f'-- SEGMENT {s} ----------------------')
    sub_dict = df.loc[(df[f'o_dep_{s}_place'] != '?') & (df[f'o_legid'].notna()),
                      ['nr', f'o_legid', f'o_dep_{s}_p', f'o_dep_{s}_e', f'o_dep_{s}_place', 'path_indexes']]\
        .to_dict('records')
    res = gds.run_cypher(f'''
        UNWIND $relMaps AS relMap
        WITH toInteger(relMap.nr) AS shipmentId,
            toInteger(relMap.o_dep_{s}_place) AS airportId,
            toInteger(relMap.o_legid) AS legId,
            toInteger(relMap.o_dep_{s}_e) AS effectiveMinutes,
            toInteger(relMap.o_dep_{s}_p) AS plannedMinutes,
            relMap.path_indexes.o AS pathIndex
        MATCH(n1:DepartureWarehouse {{airportId: airportId}})
        MATCH(n2:DeparturePoint {{airportId: airportId}})
        MERGE(n1)-[r:FREIGHT_DEPARTURE {{shipmentId: shipmentId, legId: legId, legNumber: $legNumber, segmentNumber: $segmentNumber}}]->(n2)
        ON CREATE SET r.plannedMinutes=plannedMinutes,
            r.effectiveMinutes=effectiveMinutes,
            r.pathIndex = pathIndex
        RETURN count(r)
    ''', params={'relMaps':sub_dict, 'legNumber':-1, 'segmentNumber':s})
    print(res)

-- SEGMENT 1 ----------------------
   count(r)
0      3942
-- SEGMENT 2 ----------------------
   count(r)
0      1845
-- SEGMENT 3 ----------------------
   count(r)
0        26


In [79]:
# (:DeparturePoint)-[r:FREIGHT_TRANSPORT]-(:ArrivalWarehouse) inbound
for l in LEGS:
    print(f'== LEG {l} ======================')
    for s in SEGMENTS:
        print(f'-- SEGMENT {s} ----------------------')
        sub_dict = df.loc[(df[f'i{l}_rcf_{s}_place'] != '?') & (df[f'i{l}_legid'].notna()),
                          ['nr', f'i{l}_legid', f'i{l}_rcf_{s}_p', f'i{l}_rcf_{s}_e', f'i{l}_dep_{s}_place', f'i{l}_rcf_{s}_place', 'path_indexes']]\
            .to_dict('records')
        res = gds.run_cypher(f'''
            UNWIND $relMaps AS relMap
            WITH toInteger(relMap.nr) AS shipmentId,
                toInteger(relMap.i{l}_dep_{s}_place) AS fromAirportId,
                toInteger(relMap.i{l}_rcf_{s}_place) AS toAirportId,
                toInteger(relMap.i{l}_legid) AS legId,
                toInteger(relMap.i{l}_rcf_{s}_e) AS effectiveMinutes,
                toInteger(relMap.i{l}_rcf_{s}_p) AS plannedMinutes,
                relMap.path_indexes.i{l} AS pathIndex
            MATCH(n1:DeparturePoint {{airportId: fromAirportId}})
            MATCH(n2:ArrivalWarehouse {{airportId: toAirportId}})
            MERGE(n1)-[r:FREIGHT_TRANSPORT {{shipmentId: shipmentId, legId: legId, legNumber: $legNumber, segmentNumber: $segmentNumber}}]->(n2)
            ON CREATE SET r.plannedMinutes=plannedMinutes,
                r.effectiveMinutes=effectiveMinutes,
                r.pathIndex = pathIndex
            RETURN count(r)
        ''', params={'relMaps':sub_dict, 'legNumber':l, 'segmentNumber':s})
        print(res)

-- SEGMENT 1 ----------------------
   count(r)
0      3942
-- SEGMENT 2 ----------------------
   count(r)
0      1195
-- SEGMENT 3 ----------------------
   count(r)
0        23
-- SEGMENT 1 ----------------------
   count(r)
0      2624
-- SEGMENT 2 ----------------------
   count(r)
0       791
-- SEGMENT 3 ----------------------
   count(r)
0        14
-- SEGMENT 1 ----------------------
   count(r)
0      1366
-- SEGMENT 2 ----------------------
   count(r)
0       391
-- SEGMENT 3 ----------------------
   count(r)
0         8


In [80]:
# (:DeparturePoint)-[r:FREIGHT_TRANSPORT]-(:ArrivalWarehouse) RCF outbound
for s in SEGMENTS:
    print(f'-- SEGMENT {s} ----------------------')
    sub_dict = df.loc[(df[f'o_rcf_{s}_place'] != '?') & (df[f'o_legid'].notna()),
                      ['nr', f'o_legid', f'o_rcf_{s}_p', f'o_rcf_{s}_e',  f'o_dep_{s}_place', f'o_rcf_{s}_place', 'path_indexes']]\
        .to_dict('records')
    res = gds.run_cypher(f'''
        UNWIND $relMaps AS relMap
        WITH toInteger(relMap.nr) AS shipmentId,
            toInteger(relMap.o_dep_{s}_place) AS fromAirportId,
            toInteger(relMap.o_rcf_{s}_place) AS toAirportId,
            toInteger(relMap.o_legid) AS legId,
            toInteger(relMap.o_rcf_{s}_e) AS effectiveMinutes,
            toInteger(relMap.o_rcf_{s}_p) AS plannedMinutes,
            relMap.path_indexes.o AS pathIndex
        MATCH(n1:DeparturePoint {{airportId: fromAirportId}})
        MATCH(n2:ArrivalWarehouse {{airportId: toAirportId}})
        MERGE(n1)-[r:FREIGHT_TRANSPORT {{shipmentId: shipmentId, legId: legId, legNumber: $legNumber, segmentNumber: $segmentNumber}}]->(n2)
        ON CREATE SET r.plannedMinutes=plannedMinutes,
            r.effectiveMinutes=effectiveMinutes,
            r.pathIndex = pathIndex
        RETURN count(r)
    ''', params={'relMaps':sub_dict, 'legNumber':-1, 'segmentNumber':s})
    print(res)

-- SEGMENT 1 ----------------------
   count(r)
0      3942
-- SEGMENT 2 ----------------------
   count(r)
0      1845
-- SEGMENT 3 ----------------------
   count(r)
0        26


In [81]:
# (:ArrivalWarehouse)-[r:FREIGHT_DELIVERY]-(:Destination) DLV inbound
for l in LEGS:
    sub_dict = df.loc[(df[f'i{l}_legid'] != '?') & (df[f'i{l}_legid'].notna()),
                      ['nr', f'i{l}_legid', f'i{l}_dlv_p', f'i{l}_dlv_e', 'last_i_rcf_place', 'path_indexes']].to_dict('records')
    res = gds.run_cypher(f'''
        UNWIND $relMaps AS relMap
        WITH toInteger(relMap.nr) AS shipmentId,
            toInteger(relMap.last_i_rcf_place) AS airportId,
            toInteger(relMap.i{l}_legid) AS legId,
            toInteger(relMap.i{l}_dlv_e) AS effectiveMinutes,
            toInteger(relMap.i{l}_dlv_p) AS plannedMinutes,
            relMap.path_indexes.i{l} AS pathIndex
        MATCH(n1:ArrivalWarehouse {{airportId: airportId}})
        MATCH(n2:Destination {{airportId: airportId}})
        MERGE(n1)-[r:FREIGHT_DELIVERY {{shipmentId: shipmentId, legId: legId, legNumber: $legNumber}}]->(n2)
        ON CREATE SET r.plannedMinutes=plannedMinutes,
            r.effectiveMinutes=effectiveMinutes,
            r.pathIndex = pathIndex
        RETURN count(r)
    ''', params={'relMaps':sub_dict, 'legNumber':l})
    print(res)

   count(r)
0      3942
   count(r)
0      2624
   count(r)
0      1366


In [82]:
# (:ArrivalWarehouse)-[r:FREIGHT_DELIVERY]-(:Destination) DLV outbound
sub_dict = df.loc[(df[f'o_legid'] != '?') & (df[f'o_legid'].notna()),
                  ['nr', 'o_legid', f'o_dlv_p', f'o_dlv_e', 'last_o_rcf_place', 'path_indexes']].to_dict('records')
res = gds.run_cypher('''
    UNWIND $relMaps AS relMap
    WITH toInteger(relMap.nr) AS shipmentId,
        toInteger(relMap.last_o_rcf_place) AS airportId,
        toInteger(relMap.o_legid) AS legId,
        toInteger(relMap.o_dlv_e) AS effectiveMinutes,
        toInteger(relMap.o_dlv_p) AS plannedMinutes,
        relMap.path_indexes.o AS pathIndex
    MATCH(n1:ArrivalWarehouse {airportId: airportId})
    MATCH(n2:Destination {airportId: airportId})
    MERGE(n1)-[r:FREIGHT_DELIVERY {shipmentId: shipmentId, legId: legId, legNumber: $legNumber}]->(n2)
    ON CREATE SET r.plannedMinutes=plannedMinutes,
        r.effectiveMinutes=effectiveMinutes,
        r.pathIndex = pathIndex
    RETURN count(r)
''', params={'relMaps':sub_dict, 'legNumber':-1})
print(res)

   count(r)
0      3942


In [83]:
df.loc[(df[f'i{l}_legid'] != '?') & (df[f'i{l}_legid'].notna()),
       ['nr', f'i{l}_legid', 'last_i_rcf_place', 'path_indexes']]

Unnamed: 0,nr,i3_legid,last_i_rcf_place,path_indexes
6,6,571,391,"{'o': ['815-281', '815-281', '610-281'], 'i1':..."
12,12,9401,815,"{'o': ['281-485', '281-485', '614-485'], 'i1':..."
14,14,11408,349,"{'o': ['456-149', '671-149', '468-149'], 'i1':..."
22,22,8092,815,"{'o': ['332-403', '614-403', '349-403'], 'i1':..."
24,24,14188,391,"{'o': ['815-783', '628-783', '233-783'], 'i1':..."
...,...,...,...,...
3926,3927,13056,286,"{'o': ['166-261', '815-261', '671-261'], 'i1':..."
3932,3933,1291,339,"{'o': ['556-108', '556-108', '556-108'], 'i1':..."
3934,3935,4129,609,"{'o': ['815-243', '366-243', '815-243'], 'i1':..."
3937,3938,3991,243,"{'o': ['233-127', '233-127', '815-127'], 'i1':..."


In [84]:
# (:Destination)-[r:TRANSFER]-(:EntryPoint) inbound->outbound
for l in LEGS:
    sub_dict = df.loc[(df[f'i{l}_legid'] != '?') & (df[f'i{l}_legid'].notna()),
                      ['nr', f'i{l}_legid', 'last_i_rcf_place', 'path_indexes']].to_dict('records')
    res = gds.run_cypher(f'''
        UNWIND $relMaps AS relMap
        WITH toInteger(relMap.nr) AS shipmentId,
            toInteger(relMap.last_i_rcf_place) AS airportId,
            toInteger(relMap.i{l}_legid) AS legId,
            relMap.path_indexes.o AS pathIndex
        MATCH(n1:Destination {{airportId: airportId}})
        MATCH(n2:EntryPoint {{airportId: airportId}})
        MERGE(n1)-[r:TRANSFER {{shipmentId: shipmentId, legId: legId, legNumber: $legNumber}}]->(n2)
        ON CREATE SET r.plannedMinutes=0, r.effectiveMinutes=0, r.pathIndex = pathIndex
        RETURN count(r)
    ''', params={'relMaps':sub_dict, 'legNumber':l})
    print(res)

   count(r)
0      3942
   count(r)
0      2624
   count(r)
0      1366


In [85]:
# (:ArrivalWarehouse)-[r:CONNECT]-(:DepartureWarehouse) inbound
for l in LEGS:
    print(f'== LEG {l} ======================')
    for s in [2,3]:
        print(f'-- SEGMENT {s} ----------------------')
        sub_dict = df.loc[(df[f'i{l}_rcf_{s}_place'] != '?') & (df[f'i{l}_legid'].notna()),
                          ['nr', f'i{l}_legid', f'i{l}_dep_{s}_place', 'path_indexes']].to_dict('records')
        res = gds.run_cypher(f'''
            UNWIND $relMaps AS relMap
            WITH toInteger(relMap.nr) AS shipmentId,
                toInteger(relMap.i{l}_dep_{s}_place) AS airportId,
                toInteger(relMap.i{l}_legid) AS legId,
                relMap.path_indexes.i{l} AS pathIndex
            MATCH(n1:ArrivalWarehouse {{airportId: airportId}})
            MATCH(n2:DepartureWarehouse {{airportId: airportId}})
            MERGE(n1)-[r:CONNECT {{shipmentId: shipmentId, legId: legId, legNumber: $legNumber}}]->(n2)
            ON CREATE SET r.plannedMinutes=0, r.effectiveMinutes=0, r.pathIndex = pathIndex
            RETURN count(r)
        ''', params={'relMaps':sub_dict, 'legNumber':l})
        print(res)

-- SEGMENT 2 ----------------------
   count(r)
0      1195
-- SEGMENT 3 ----------------------
   count(r)
0        23
-- SEGMENT 2 ----------------------
   count(r)
0       791
-- SEGMENT 3 ----------------------
   count(r)
0        14
-- SEGMENT 2 ----------------------
   count(r)
0       391
-- SEGMENT 3 ----------------------
   count(r)
0         8


In [86]:
# (:ArrivalWarehouse)-[r:CONNECT]-(:DepartureWarehouse) outbound

for s in [2,3]:
    print(f'-- SEGMENT {s} ----------------------')
    sub_dict = df.loc[(df[f'o_rcf_{s}_place'] != '?') & (df[f'o_legid'].notna()),
                      ['nr', f'o_legid', f'o_dep_{s}_place', 'path_indexes']].to_dict('records')
    res = gds.run_cypher(f'''
            UNWIND $relMaps AS relMap
            WITH toInteger(relMap.nr) AS shipmentId,
                toInteger(relMap.o_dep_{s}_place) AS airportId,
                toInteger(relMap.o_legid) AS legId,
                relMap.path_indexes.o AS pathIndex
            MATCH(n1:ArrivalWarehouse {{airportId: airportId}})
            MATCH(n2:DepartureWarehouse {{airportId: airportId}})
            MERGE(n1)-[r:CONNECT {{shipmentId: shipmentId, legId: legId, legNumber: $legNumber}}]->(n2)
            ON CREATE SET r.plannedMinutes=0, r.effectiveMinutes=0, r.pathIndex=pathIndex
            RETURN count(r)
    ''', params={'relMaps':sub_dict, 'legNumber':-1})
    print(res)

-- SEGMENT 2 ----------------------
   count(r)
0      1845
-- SEGMENT 3 ----------------------
   count(r)
0        26
