In [1]:
# This jupyter notebook demonstrates for how a local neo4j 
# server (running/started in Neo4j Desktop)
# with a populated instance of the geograph model 
# (named "geograph" in this example) can be integrated
# into a Python workflow. 

In [2]:
from py2neo import Graph,Node,Relationship
import pandas as pd

In [3]:
geograph = Graph("http://localhost:7474", name="geograph")

In [4]:
base_view_query = '''
MATCH (s:State)-[]-(c:County)-[r]-(t:Censustract)
WITH s, s.stateName AS state, c.countyName AS baseName, count(r) AS baseIndegree
MERGE (n:BaseView { stateName:state, countyName:baseName, startingIndegree:baseIndegree });
'''

In [5]:
filtered_view_query = '''
MATCH (s:State)-[]-(c:County)-[r]-(t:Censustract)
WHERE
(t.leakyUndergroundStorageTanksPercentile >= 90 OR t.wastewaterDischargePercentile >= 90)
 AND t.percentOfIndividualsBelow200FederalPovertyLinePercentile >= 65
 AND
(t.expectedAgriculturalLossRateNaturalHazardsRiskIndexPercentile >= 90
 OR t.expectedBuildingLossRateNaturalHazardsRiskIndexPercentile >= 90
 OR t.expectedPopulationLossRateNaturalHazardsRiskIndexPercentile >= 90
 OR t.shareOfPropertiesAtRiskOfFloodIn30YearsPercentile >= 90
 OR t.shareOfPropertiesAtRiskOfFireIn30YearsPercentile >= 90)
WITH s, c, count(r) AS indegree
WITH s, s.stateName AS state, c.countyName AS county, indegree
MERGE (n:FilteredView { stateName: state, countyName: county, finalIndegree: indegree });
'''

In [6]:
table_output_query = '''
MATCH (b:BaseView), (f:FilteredView)
WHERE (b.stateName = f.stateName) AND (b.countyName = f.countyName)
RETURN
b.stateName AS State_Name,
b.countyName AS County_Name,
b.startingIndegree AS Initial_Indegree,
f.finalIndegree AS Filtered_Indegree,
toFloat(f.finalIndegree) / toFloat(b.startingIndegree) AS Proportion
 ORDER BY Proportion DESC;
'''

In [7]:
# This cell should only be run once. 
# Running it multiple times will create
# duplicates of the nodes merged by
# base_view_query and filtered_view_query
geograph.evaluate(base_view_query)
geograph.evaluate(filtered_view_query)

In [8]:
# This cell can be run arbitrarily, table_output_query
# only writes to stdout, not to the graph.
df = geograph.run(table_output_query).to_data_frame()

In [None]:
# Separating the printing of the dataframe table view
# allows for verification of a queries output independent 
# of that query. 
df

In [9]:
# IMPORTANT
# After verifying your table output you must use 
# the DETACH DELETE query in this cell to
# clean up your local graph instance.
base_view_cleanup_query = '''
MATCH (n:BaseView)
DETACH DELETE n;
'''
filtered_view_cleanup_query = '''
MATCH (n:FilteredView)
DETACH DELETE n;
'''
geograph.evaluate(base_view_cleanup_query)
geograph.evaluate(filtered_view_cleanup_query)

In [11]:
# Isolating table export ensures that EDA, in-depth analysis, 
# documentation, etc. takes place independent of the graph 
# instance. The data can be manipulated and experimented 
# upon without mutating the graph.
output = pd.DataFrame(df)
output.to_csv("output.csv")