In [30]:
#run this line if you are starting in a fresh colab environment
!pip install graphdatascience



In [31]:
from graphdatascience import GraphDataScience

In [32]:
gds = GraphDataScience('bolt://3.86.239.25:7687', auth=('neo4j', 'canisters-amplifiers-bets'))

## Count the nodes

In [33]:
gds.run_cypher('MATCH (n) RETURN count(n) AS numberOfNodes')

Unnamed: 0,numberOfNodes
0,8627


## Match relationships (basic paths)

In [34]:
gds.run_cypher('''
    MATCH (a1:Airport)-[r:HAS_ROUTE]->(a2)
    RETURN a1, r, a2
    LIMIT 10
''')

Unnamed: 0,a1,r,a2
0,"(descr, altitude, longest, iata, city, icao, l...",(distance),"(altitude, descr, longest, iata, city, icao, l..."
1,"(descr, altitude, longest, iata, city, icao, l...",(distance),"(altitude, descr, longest, iata, city, icao, l..."
2,"(descr, altitude, longest, iata, city, icao, l...",(distance),"(altitude, descr, longest, iata, city, icao, l..."
3,"(descr, altitude, longest, iata, city, icao, l...",(distance),"(altitude, descr, longest, iata, city, icao, l..."
4,"(descr, altitude, longest, iata, city, icao, l...",(distance),"(altitude, descr, longest, iata, city, icao, l..."
5,"(descr, altitude, longest, iata, city, icao, l...",(distance),"(altitude, descr, longest, iata, city, icao, l..."
6,"(descr, altitude, longest, iata, city, icao, l...",(distance),"(descr, altitude, longest, iata, city, icao, l..."
7,"(descr, altitude, longest, iata, city, icao, l...",(distance),"(descr, altitude, longest, iata, city, icao, l..."
8,"(descr, altitude, longest, iata, city, icao, l...",(distance),"(descr, altitude, longest, iata, city, icao, l..."
9,"(descr, altitude, longest, iata, city, icao, l...",(distance),"(altitude, descr, longest, iata, city, icao, l..."


## Select specific properties instead

In [35]:
gds.run_cypher('''
    MATCH (a1:Airport)-[r:HAS_ROUTE]->(a2)
    RETURN a1.iata AS originAirport, r.distance AS routeMiles, a2.iata AS destAirport
    LIMIT 10
''')

Unnamed: 0,originAirport,routeMiles,destAirport
0,ATL,708,MSN
1,ATL,1936,CCS
2,ATL,606,ORD
3,ATL,1886,SAN
4,ATL,1123,FAR
5,ATL,4782,MUC
6,ATL,670,MKE
7,ATL,1084,MTY
8,ATL,759,OKC
9,ATL,143,AGS


## Match given node property

In [36]:
gds.run_cypher('''
    MATCH (a1:Airport)-[r:HAS_ROUTE]->(a2)
    WHERE a1.iata = "DFW"
    RETURN a1.city AS originCity, r.distance AS routeMiles, a2.city AS destCity
    ORDER BY routeMiles DESC
    LIMIT 10
''')

Unnamed: 0,originCity,routeMiles,destCity
0,Dallas,8574,Sydney
1,Dallas,8105,Hong Kong
2,Dallas,8053,Abu Dhabi
3,Dallas,8022,Dubai
4,Dallas,7914,Doha
5,Dallas,7332,Shanghai
6,Dallas,6951,Beijing
7,Dallas,6822,Seoul
8,Dallas,6410,Tokyo
9,Dallas,5597,Rome


## A More Complicated Match and Filter

In [37]:
#Find flight plans from Dallas, USA to Sydney, AUS with exactly one layover and total route miles less than 10K
gds.run_cypher('''
    MATCH (a1:Airport {iata: "DFW"})-[r1:HAS_ROUTE]->(a2)-[r2:HAS_ROUTE]->(a3:Airport {iata: "SYD"})
    WITH a2.iata AS layoverAirport, a2.city AS layoverCity, r1.distance + r2.distance AS totalMiles
    WHERE totalMiles < 10000
    RETURN layoverAirport, layoverCity, totalMiles
    ORDER BY totalMiles
''')

Unnamed: 0,layoverAirport,layoverCity,totalMiles
0,LAX,Los Angeles,8720
1,IAH,Houston,8816
2,HNL,Honolulu,8849
3,SFO,San Francisco,8880
4,YVR,Vancouver,9513


## K-Hop Patterns

In [38]:
#Find flight plans from Dallas, USA to Sydney with fewer than 3 layovers and rank by shortest total route distance
gds.run_cypher('''
    MATCH p=(a1:Airport {iata: "DFW"})-[:HAS_ROUTE*1..3]->(a3:Airport {iata: "SYD"})
    RETURN size( nodes(p)) - 2 AS numberOfLayovers,
        [n in nodes(p) |  n.iata + "-" + n.city] AS citiesTraveled,
        reduce(d = 0, r IN relationships(p) | d + r.distance) AS totalMiles
    ORDER BY totalMiles
    LIMIT 100
''')

Unnamed: 0,numberOfLayovers,citiesTraveled,totalMiles
0,0,"[DFW-Dallas, SYD-Sydney]",8574
1,2,"[DFW-Dallas, LAX-Los Angeles, NAN-Nadi, SYD-Sy...",8720
2,1,"[DFW-Dallas, LAX-Los Angeles, SYD-Sydney]",8720
3,2,"[DFW-Dallas, PSP-Palm Springs, LAX-Los Angeles...",8721
4,2,"[DFW-Dallas, PHX-Phoenix, LAX-Los Angeles, SYD...",8724
...,...,...,...
95,2,"[DFW-Dallas, FAT-Fresno, LAX-Los Angeles, SYD-...",9008
96,2,"[DFW-Dallas, LCH-Lake Charles, IAH-Houston, SY...",9013
97,2,"[DFW-Dallas, ICT-Wichita, LAX-Los Angeles, SYD...",9017
98,2,"[DFW-Dallas, AUS-Austin, HNL-Honolulu, SYD-Syd...",9018


In [39]:
#Same as above but with query parameters
gds.run_cypher('''
    MATCH p=(a1:Airport {iata: $originIata})-[:HAS_ROUTE*1..3]->(a3:Airport {iata: $destIata})
    RETURN size( nodes(p)) - 2 AS numberOfLayovers,
        [n in nodes(p) |  n.iata + "-" + n.city] AS citiesTraveled,
        reduce(d = 0, r IN relationships(p) | d + r.distance) AS totalMiles
    ORDER BY totalMiles
    LIMIT 100
''', params= {
    'originIata': 'DFW',
    'destIata': 'SYD',
    'maxPathLength': 3
})

Unnamed: 0,numberOfLayovers,citiesTraveled,totalMiles
0,0,"[DFW-Dallas, SYD-Sydney]",8574
1,2,"[DFW-Dallas, LAX-Los Angeles, NAN-Nadi, SYD-Sy...",8720
2,1,"[DFW-Dallas, LAX-Los Angeles, SYD-Sydney]",8720
3,2,"[DFW-Dallas, PSP-Palm Springs, LAX-Los Angeles...",8721
4,2,"[DFW-Dallas, PHX-Phoenix, LAX-Los Angeles, SYD...",8724
...,...,...,...
95,2,"[DFW-Dallas, FAT-Fresno, LAX-Los Angeles, SYD-...",9008
96,2,"[DFW-Dallas, LCH-Lake Charles, IAH-Houston, SY...",9013
97,2,"[DFW-Dallas, ICT-Wichita, LAX-Los Angeles, SYD...",9017
98,2,"[DFW-Dallas, AUS-Austin, HNL-Honolulu, SYD-Syd...",9018
