In [1]:
from graphdatascience import GraphDataScience
import emblaze
import numpy as np
import pandas as pd

In [2]:
gds = GraphDataScience("bolt://localhost:11003", auth=("neo4j", "1234567890"), database="neo4j")
print(gds.version())
assert gds.version()
using_enterprise = gds.is_licensed()

2.0.3


In [3]:
gds.run_cypher("""
match (:Airport)-[r:HAS_ROUTE]->(:Airport)
with collect(r) as routes, max(r.distance) as maxDistance
foreach(route in routes | set route.weight = maxDistance + 1 - route.distance)
""")
gds.run_cypher("""
match (n:Airport)
with collect(n) as airports
foreach(airport in airports | set airport.longitude = airport.location.x)
""")
gds.run_cypher("""
match (n:Airport)
with collect(n) as airports
foreach(airport in airports | set airport.latitude = airport.location.y)
""")
gds.run_cypher("""
match (p:Airport)-[r:HAS_ROUTE]->(q:Airport)
SET r.avgPageRank = p.pagerank + q.pagerank
""")
gds.run_cypher("""
match (:Airport)-[r:HAS_ROUTE]->(:Airport)
with collect(r) as routes, max(r.avgPageRank) as maxRank
foreach(route in routes | set route.weightedRank = maxRank + 1 - route.avgPageRank)
""")

In [4]:
G_routes, result = gds.graph.project(
   "air-routes", 
   "Airport",                                   
   {"HAS_ROUTE":
      {"orientation":"UNDIRECTED", 
      "aggregation":"MAX"}
   }, 
   relationshipProperties = ["weight", "distance", "weightedRank"],
    nodeProperties = ["latitude", "longitude"]
)

routes_wcc = gds.wcc.stats(G_routes)
routes_wcc['componentDistribution']

gds.wcc.mutate(G_routes, mutateProperty = 'componentId')

gds.graph.writeNodeProperties(G_routes, ['componentId'])

gds.run_cypher("MATCH (a:Airport) RETURN a.componentId as componentId, count(*) as nodeCount ORDER BY count(*) DESC limit 1")

G_connected_airports, result = gds.beta.graph.project.subgraph("connected-airports", G_routes, "n.componentId = 0", "*")

In [5]:
routes_wcc['componentDistribution']


{'p99': 1,
 'min': 1,
 'max': 3292,
 'mean': 16.52358490566038,
 'p90': 1,
 'p50': 1,
 'p999': 3292,
 'p95': 1,
 'p75': 1}

In [4]:
def train_fast_rp(graph, config):
    result = gds.fastRP.mutate(
        graph,
        embeddingDimension = 64,
        randomSeed = 45,
        **config
    )
    return result

In [5]:
configs = [{"iterationWeights": [1.0, 1.0], 
            "mutateProperty": "shallowUnweighted"},
           {"iterationWeights": [0.0, 1.0, 1.0], 
            "mutateProperty": "mediumUnweighted"},
           {"iterationWeights": [1.0, 1.0, 1.0, 1.0], 
            "mutateProperty": "deepUnweighted"},
           {"iterationWeights": [1.0, 1.0], 
            "relationshipWeightProperty": "weight", 
            "mutateProperty": "shallowWeighted"},
           {"iterationWeights": [0.0, 1.0, 1.0], 
            "relationshipWeightProperty": "weight", 
            "mutateProperty": "mediumWeighted"},
           {"iterationWeights": [1.0, 1.0, 1.0, 1.0], 
            "relationshipWeightProperty": "weight", 
            "mutateProperty": "deepWeighted"}]

In [7]:
embedding_results = [train_fast_rp(G_connected_airports, config) for config in configs]

In [8]:
pd.DataFrame(embedding_results)

Unnamed: 0,nodePropertiesWritten,mutateMillis,nodeCount,preProcessingMillis,computeMillis,configuration
0,3292,0,3292,1,47,"{'nodeSelfInfluence': 0, 'relationshipWeightPr..."
0,3292,0,3292,0,19,"{'nodeSelfInfluence': 0, 'relationshipWeightPr..."
0,3292,0,3292,0,21,"{'nodeSelfInfluence': 0, 'relationshipWeightPr..."
0,3292,0,3292,1,30,"{'nodeSelfInfluence': 0, 'relationshipWeightPr..."
0,3292,0,3292,0,23,"{'nodeSelfInfluence': 0, 'relationshipWeightPr..."
0,3292,0,3292,0,29,"{'nodeSelfInfluence': 0, 'relationshipWeightPr..."


In [9]:
embedding_df = gds.run_cypher("""
    call gds.graph.streamNodeProperties("connected-airports", 
        ["shallowUnweighted", 
         "mediumUnweighted", 
         "deepUnweighted", 
         "shallowWeighted", 
         "mediumWeighted", 
         "deepWeighted"]) 
    yield nodeId, nodeProperty, propertyValue
    WITH gds.util.asNode(nodeId) as a,
    MAX(case when nodeProperty = "shallowUnweighted" then       
           propertyValue end) as shallowUnweighted,
    MAX(case when nodeProperty = "mediumUnweighted" then 
           propertyValue end) as mediumUnweighted,
    MAX(case when nodeProperty = "deepUnweighted" then 
           propertyValue end) as deepUnweighted,
    MAX(case when nodeProperty = "shallowWeighted" then 
           propertyValue end) as shallowWeighted,
    MAX(case when nodeProperty = "mediumWeighted" then 
           propertyValue end) as mediumWeighted,
    MAX(case when nodeProperty = "deepWeighted" then 
           propertyValue end) as deepWeighted
    MATCH (a)-[:ON_CONTINENT]->(c:Continent)
    RETURN
    a.descr as airport_name, 
    a.iata as airport_code, 
    c.name as continent,
    shallowUnweighted,
    mediumUnweighted,
    deepUnweighted,
    shallowWeighted,
    mediumWeighted,
    deepWeighted
    ORDER BY size([(a)-[:HAS_ROUTE]-() | a]) DESC
    LIMIT 900
    """)

In [10]:
embedding_df

Unnamed: 0,airport_name,airport_code,continent,shallowUnweighted,mediumUnweighted,deepUnweighted,shallowWeighted,mediumWeighted,deepWeighted
0,Frankfurt am Main,FRA,EU,"[-0.07399057596921921, 0.2620871663093567, 0.1...","[-0.11316736042499542, 0.24514061212539673, 0....","[-0.15192122757434845, 0.5060424208641052, 0.2...","[-0.19411295652389526, 0.2675624191761017, 0.2...","[-0.1977788209915161, 0.2508077621459961, 0.13...","[-0.36135566234588623, 0.5160863399505615, 0.3..."
1,Paris Charles de Gaulle,CDG,EU,"[-0.06564060598611832, 0.055160876363515854, 0...","[-0.11409448832273483, 0.21923694014549255, 0....","[-0.15566448867321014, 0.28849947452545166, 0....","[-0.15881961584091187, 0.0621316060423851, 0.2...","[-0.20260721445083618, 0.22906702756881714, 0....","[-0.3372170031070709, 0.30104851722717285, 0.3..."
2,Istanbul International Airport,IST,EU,"[0.026787318289279938, 0.2438107430934906, 0.1...","[-0.025942979380488396, 0.2735888957977295, 0....","[-0.0012168977409601212, 0.5254808664321899, 0...","[-0.0003005862236022949, 0.26281487941741943, ...","[-0.06701691448688507, 0.2827916741371155, 0.1...","[-0.07971426844596863, 0.5542945861816406, 0.3..."
3,Amsterdam Airport Schiphol,AMS,EU,"[-0.1855502426624298, 0.16803550720214844, 0.1...","[-0.16270774602890015, 0.2214188426733017, 0.1...","[-0.297057569026947, 0.40379035472869873, 0.24...","[-0.28153276443481445, 0.19925662875175476, 0....","[-0.2414441704750061, 0.22703152894973755, 0.1...","[-0.47922539710998535, 0.43863940238952637, 0...."
4,Munich International Airport,MUC,EU,"[-0.20580337941646576, 0.11519283056259155, 0....","[-0.16538169980049133, 0.2382025420665741, 0.1...","[-0.3348800837993622, 0.3535888195037842, 0.32...","[-0.29063165187835693, 0.13292333483695984, 0....","[-0.233033686876297, 0.24436715245246887, 0.15...","[-0.4940841495990753, 0.37679851055145264, 0.3..."
...,...,...,...,...,...,...,...,...,...
895,Nanping Wuyishan Airport,WUS,AS,"[-0.15465746819972992, -0.29468223452568054, -...","[-0.16766786575317383, -0.05741892755031586, -...","[-0.28116992115974426, -0.25400516390800476, -...","[-0.16491110622882843, -0.310563325881958, -0....","[-0.19777408242225647, -0.0949367955327034, -0...","[-0.33032694458961487, -0.3239209055900574, -0..."
896,Scott AFB/Midamerica Airport,BLV,,"[0.3646937608718872, 0.18155963718891144, -0.1...","[0.38825953006744385, 0.09397226572036743, -0....","[0.6421867609024048, 0.2155066579580307, -0.12...","[0.3598550856113434, 0.179595947265625, -0.126...","[0.38907772302627563, 0.08189046382904053, -0....","[0.6340279579162598, 0.1816011667251587, -0.09..."
897,Canberra International Airport,CBR,OC,"[0.0862639844417572, 0.5978029370307922, 0.028...","[0.18571466207504272, 0.6202242374420166, 0.07...","[0.1772807538509369, 1.2233781814575195, 0.106...","[0.04642222821712494, 0.528540849685669, 0.041...","[0.12939876317977905, 0.6064460277557373, 0.09...","[0.1165374219417572, 1.1702359914779663, 0.154..."
898,Mangalore International Airport,IXE,AS,"[0.0036581531167030334, 0.37329620122909546, 0...","[0.11747836321592331, 0.337380051612854, 0.131...","[0.08971487730741501, 0.7435741424560547, 0.12...","[-0.029092632234096527, 0.34626173973083496, 0...","[0.06642842292785645, 0.3237442374229431, 0.11...","[0.023804744705557823, 0.7144446969032288, 0.1..."


In [11]:
def create_emblaze_embedding(embedding_df, column):
    emb = emblaze.Embedding({
             emblaze.Field.POSITION: np.array(list(embedding_df[column])),
             emblaze.Field.COLOR: embedding_df['continent']}, 
             n_neighbors = 10,
             label=column, 
             metric='cosine')
    emb.compute_neighbors()
    return emb.project()

In [12]:
emblaze_embeddings = [create_emblaze_embedding(embedding_df, column)    
   for column in embedding_df.columns[3:]]

In [13]:
variants = emblaze.EmbeddingSet(emblaze_embeddings)

In [14]:
variants

<EmbeddingSet with 6 embeddings:
	<Embedding with 900 items, 2 fields (position, color)>
	<Embedding with 900 items, 2 fields (position, color)>
	<Embedding with 900 items, 2 fields (position, color)>
	<Embedding with 900 items, 2 fields (position, color)>
	<Embedding with 900 items, 2 fields (position, color)>
	<Embedding with 900 items, 2 fields (position, color)>>

In [15]:
thumbnails = emblaze.TextThumbnails(embedding_df['airport_name'] + 
   " (" + embedding_df['airport_code'] + ")")

In [16]:
w = emblaze.Viewer(embeddings = variants, thumbnails = thumbnails)

In [17]:
w

Viewer(colorScheme='tableau', data={'data': [{'_format': 'compressed', '_idtype': 'u2', '_length': 900, 'ids':…

In [18]:
rout = gds.run_cypher("""
MATCH p=shortestPath((p1:Airport {icao:'KMSP'})-[:HAS_ROUTE*1..20]->(p3:Airport {icao:'NZWN'})) \
Return nodes(p) as path
""")

In [8]:
rout

Unnamed: 0,path
0,"[(altitude, componentId, longest, city, latitu..."


In [11]:
def dijkstra_distance(source, destination):
    return gds.run_cypher("""
MATCH (p1:Airport {iata:'""" + source +"""'}), (p2:Airport {iata:'""" + destination + """'})

CALL gds.shortestPath.dijkstra.stream('air-routes',{
    sourceNode: p1,
    targetNode: p2,
    relationshipWeightProperty: 'distance'
})
YIELD index, sourceNode, targetNode, totalCost, nodeIds, costs, path
RETURN
    index,
    gds.util.asNode(sourceNode).city AS sourceNodeName,
    gds.util.asNode(targetNode).city AS targetNodeName,
    totalCost,
    [nodeId IN nodeIds | gds.util.asNode(nodeId).city] AS nodeNames,
    [nodeId IN nodeIds | gds.util.asNode(nodeId).iata] AS nodeIATA,
    costs,
    nodes(path) as path
ORDER BY index
""")

In [15]:
def astar_distance(source, destination):
    return gds.run_cypher("""
MATCH (p1:Airport {iata:'""" + source + """'}), (p2:Airport {iata:'""" + destination + """'})

CALL gds.shortestPath.astar.stream('air-routes',{
    sourceNode: p1,
    targetNode: p2,
    latitudeProperty: 'latitude',
    longitudeProperty: 'longitude',
    relationshipWeightProperty: 'distance'
})
YIELD index, sourceNode, targetNode, totalCost, nodeIds, costs, path
RETURN
    index,
    gds.util.asNode(sourceNode).city AS sourceNodeName,
    gds.util.asNode(targetNode).city AS targetNodeName,
    totalCost,
    [nodeId IN nodeIds | gds.util.asNode(nodeId).city] AS nodeNames,
    [nodeId IN nodeIds | gds.util.asNode(nodeId).iata] AS nodeIATA,
    costs,
    nodes(path) as path
ORDER BY index
""")

In [21]:
def dijkstra_rank(source, destination):
    return gds.run_cypher("""
MATCH (p1:Airport {iata:'""" + source + """'}), (p2:Airport {iata:'""" + destination + """'})

CALL gds.shortestPath.dijkstra.stream('air-routes',{
    sourceNode: p1,
    targetNode: p2,
    relationshipWeightProperty: 'weightedRank'
})
YIELD index, sourceNode, targetNode, totalCost, nodeIds, costs, path
RETURN
    index,
    gds.util.asNode(sourceNode).city AS sourceNodeName,
    gds.util.asNode(targetNode).city AS targetNodeName,
    totalCost,
    [nodeId IN nodeIds | gds.util.asNode(nodeId).city] AS nodeNames,
    [nodeId IN nodeIds | gds.util.asNode(nodeId).iata] AS nodeIATA,
    costs,
    nodes(path) as path
ORDER BY index
""")

In [14]:
dijkstra_distance("DEL", "HPN")

Unnamed: 0,index,sourceNodeName,targetNodeName,totalCost,nodeNames,nodeIATA,costs,path
0,0,New Delhi,White Plains,7309.0,"[New Delhi, Reykjavik, Boston, White Plains]","[DEL, KEF, BOS, HPN]","[0.0, 4737.0, 7143.0, 7309.0]","[(altitude, longest, componentId, city, latitu..."


In [16]:
astar_distance("DEL", "HPN")

Unnamed: 0,index,sourceNodeName,targetNodeName,totalCost,nodeNames,nodeIATA,costs,path
0,0,New Delhi,White Plains,7309.0,"[New Delhi, Reykjavik, Boston, White Plains]","[DEL, KEF, BOS, HPN]","[0.0, 4737.0, 7143.0, 7309.0]","[(altitude, longest, componentId, city, latitu..."


In [20]:
dijkstra_rank("DEL", "HPN")

Unnamed: 0,index,sourceNodeName,targetNodeName,totalCost,nodeNames,nodeIATA,costs,path
0,0,New Delhi,White Plains,20.671068,"[New Delhi, Chicago, White Plains]","[DEL, ORD, HPN]","[0.0, 8.620578713845259, 20.671068093627866]","[(altitude, longest, componentId, city, latitu..."


In [22]:
G_routes.drop()
G_connected_airports.drop()


graphName                                           connected-airports
database                                                         neo4j
memoryUsage                                                           
sizeInBytes                                                         -1
nodeCount                                                         3292
relationshipCount                                                49476
configuration        {'creationTime': 2022-12-07T10:21:21.247190000...
density                                                       0.004567
creationTime                       2022-12-07T10:21:21.247190000+05:30
modificationTime                   2022-12-07T10:21:21.393133000+05:30
schema               {'relationships': {'HAS_ROUTE': {'distance': '...
Name: 0, dtype: object