In [1]:
from neo4j.v1 import GraphDatabase
driver = GraphDatabase.driver('bolt://localhost:7687',auth=('neo4j', 'password'))

In [2]:
def run_query(query):
    with driver.session() as session:
        session.run(query)

## Constraints

In [3]:
spot_constraint_query = "CREATE CONSTRAINT ON (m:Stop) ASSERT m.id IS UNIQUE;"
run_query(spot_constraint_query)

## Import data

In [4]:
import_nodes_query = """

LOAD CSV WITH HEADERS FROM "file:///network_nodes.csv" as row FIELDTERMINATOR ";"
MERGE (s:Stop{id:row.stop_I})
SET s+=apoc.map.clean(row,['stop_I'],[])

"""

In [5]:
run_query(import_nodes_query)

In [6]:
import_rels_query = """

UNWIND ['walk','bus','tram','rail','subway'] as mode
LOAD CSV WITH HEADERS FROM "file:///network_" + mode + ".csv" as row FIELDTERMINATOR ";"
MATCH (from:Stop{id:row.from_stop_I}),(to:Stop{id:row.to_stop_I})
CALL apoc.create.relationship(from, toUpper(mode),
{distance:toInteger(row.d),duration_avg:toFloat(row.duration_avg)}, to) YIELD rel
RETURN distinct 'done'

"""

In [7]:
run_query(import_rels_query)

## Preprocess attributes

In [8]:
walking_duration_calculation = """
WITH 1.38889 as walking_speed
MATCH (:Stop)-[w:WALK]->()
SET w.duration_avg = toFloat(w.distance) / walking_speed
"""

In [9]:
run_query(walking_duration_calculation)

# Graph algorithms

## Load the graph

In [10]:
load_algo_graph = """
CALL algo.graph.load('rome','Stop',
    'BUS | RAIL | SUBWAY | TRAM | WALK',
    { 
       duplicateRelationships:'min',
       relationshipProperties:{ 
          distance:{ 
             property:'distance'
          },
          duration:{ 
             property:'duration_avg'
          }
       }
    })
"""

In [11]:
run_query(load_algo_graph)

## PageRank

In [12]:
import pandas as pd

def read_query(query):
    with driver.session() as session:
        result = session.run(query)
        return pd.DataFrame([r.values() for r in result], columns=result.keys())
    

In [13]:
# Pagerank on a single relationship type
pagerank_single_rel = """
CALL algo.pageRank.stream('Stop','TRAM',{graph:'rome'})
YIELD nodeId, score
WITH nodeId, score
ORDER BY score DESC LIMIT 5
RETURN algo.asNode(nodeId).name as name, score
"""

In [14]:
read_query(pagerank_single_rel)

Unnamed: 0,name,score
0,LABICANO/PORTA MAGGIORE,2.088341
1,PRENESTINA/TOR DE' SCHIAVI,1.728012
2,TRASTEVERE/MIN. P.ISTRUZIONE,1.699413
3,PRENESTINA/OLEVANO ROMANO,1.613868
4,TRASTEVERE/BERNARD. DA FELTRE,1.587089


In [15]:
# Pagerank on multi relatioship types
pagerank_multi_rel = """
CALL algo.pageRank.stream('Stop','TRAM | RAIL | BUS',{graph:'rome'})
YIELD nodeId, score
WITH nodeId, score
ORDER BY score DESC LIMIT 5
RETURN algo.asNode(nodeId).name as name, score
"""

In [16]:
read_query(pagerank_multi_rel)

Unnamed: 0,name,score
0,LGT SASSIA/S. SPIRITO (H),5.348166
1,TUSCOLANA/ROCCELLA JONICA,4.598942
2,LAURENTINA/DOUHET,4.121394
3,PETROSELLI,4.097599
4,ANAGNINA/CASALE FERRANTI,4.037861


In [17]:
# Connected components writeback
connected_components_query = """
UNWIND ["BUS","RAIL","SUBWAY","TRAM","WALK"] as mode
CALL algo.unionFind('Stop',mode,{writeProperty:toLower(mode) + "_component"})
YIELD computeMillis
RETURN distinct 'done'
"""

In [18]:
run_query(connected_components_query)

In [19]:
# Explore subway network components
explore_subway_component_query = """
MATCH (s:Stop)
WHERE exists(s.subway_component)
RETURN s.subway_component as component,
       collect(s.name)[..3] as example_members,
count(*) as size
ORDER BY size DESC
LIMIT 10
"""

In [20]:
read_query(explore_subway_component_query)

Unnamed: 0,component,example_members,size
0,7748,"[ANAGNINA, FURIO CAMILLO, PONTE LUNGO]",27
1,7721,"[BATTISTINI, BARBERINI, REPUBBLICA]",27
2,7801,"[LAURENTINA, COLOSSEO, CAVOUR]",26
3,7775,"[REBIBBIA, CASTRO PRETORIO, TERMINI]",26
4,7848,"[PANTANO, GRANITI, FINOCCHIO]",21
5,7827,"[PANTANO, GRANITI, FINOCCHIO]",21
6,6,[Villa Bonelli],1
7,5,[Muratella],1
8,2,[La Storta],1
9,4,[Torricola],1


## Shortest Path

In [21]:
# Shortest path using single relationship type
shortest_path_single_rel_query = """
MATCH (start:Stop{name:'Parco Leonardo'}),(end:Stop{name:'Roma Trastevere'})
CALL algo.shortestPath.stream(start,end,'distance',{graph:'rome',relationshipQuery:'RAIL'})
YIELD nodeId,cost
RETURN algo.asNode(nodeId).name as name, cost as meters
"""

In [22]:
read_query(shortest_path_single_rel_query)

Unnamed: 0,name,meters
0,Parco Leonardo,0.0
1,Fiera di Roma,2217.0
2,Ponte Galeria,4537.0
3,Muratella,9886.0
4,Magliana,12020.0
5,Villa Bonelli,14529.0
6,Roma Trastevere,17403.0


In [23]:
# Shortest path using multi relationship types
shortest_path_multi_rel_query = """
MATCH (start:Stop{name:'LABICANO/PORTA MAGGIORE'}),(end:Stop{name:'TARDINI'})
CALL algo.shortestPath.stream(start,end,'distance',{graph:'rome',relationshipQuery:'WALK | RAIL'})
YIELD nodeId, cost
RETURN algo.asNode(nodeId).name as name, cost 
"""

In [24]:
read_query(shortest_path_multi_rel_query)

Unnamed: 0,name,cost
0,LABICANO/PORTA MAGGIORE,0.0
1,PORTA MAGGIORE,67.0
2,Termini Laziali,1002.0
3,Roma San Pietro,5296.0
4,GREGORIO VII/STAZ. S. PIETRO (FS),5676.0
5,AURELIA/PAOLO III,6239.0
6,VALLE AURELIA (MA),6727.0
7,PATETTA/D'AMELIO,7420.0
8,TARDINI,7906.0


In [25]:
# Shortest path by duration
shortest_path_duration_query = """
MATCH (start:Stop{name:'LABICANO/PORTA MAGGIORE'}),(end:Stop{name:'TARDINI'})
CALL algo.shortestPath.stream(start,end,'duration',{graph:'rome',relationshipQuery:'WALK | RAIL'})
YIELD nodeId, cost
RETURN algo.asNode(nodeId).name as name, cost / 60 as minutes
"""

In [26]:
read_query(shortest_path_duration_query)

Unnamed: 0,name,minutes
0,LABICANO/PORTA MAGGIORE,0.0
1,PORTA MAGGIORE,0.803999
2,S. BIBIANA,2.670666
3,TERMINI LAZIALI,4.653999
4,Termini Laziali,4.797999
5,Roma San Pietro,17.297999
6,Valle Aurelia,21.321529
7,STAMPINI,28.773523
8,TARDINI,36.969516
