In [19]:
import neo4j

import csv

import math
import numpy as np
import pandas as pd

import psycopg2

In [20]:
#
# function to run a select query and return rows in a pandas dataframe
# pandas puts all numeric values from postgres to float
# if it will fit in an integer, change it to integer
#

def my_select_query_pandas(query, rollback_before_flag, rollback_after_flag):
    "function to run a select query and return rows in a pandas dataframe"
    
    if rollback_before_flag:
        connection.rollback()
    
    df = pd.read_sql_query(query, connection)
    
    if rollback_after_flag:
        connection.rollback()
    
    # fix the float columns that really should be integers
    
    for column in df:
    
        if df[column].dtype == "float64":

            fraction_flag = False

            for value in df[column].values:
                
                if not np.isnan(value):
                    if value - math.floor(value) != 0:
                        fraction_flag = True

            if not fraction_flag:
                df[column] = df[column].astype('Int64')
    
    return(df)
    

In [21]:
driver = neo4j.GraphDatabase.driver(uri="neo4j://neo4j:7687", auth=("neo4j","ucb_mids_w205"))
session = driver.session(database="neo4j")



In [22]:
def my_neo4j_wipe_out_database():
    "wipe out database by deleting all nodes and relationships"
    
    query = "match (node)-[relationship]->() delete node, relationship"
    session.run(query)
    
    query = "match (node) delete node"
    session.run(query)
    
def my_neo4j_run_query_pandas(query, **kwargs):
    "run a query and return the results in a pandas dataframe"
    
    result = session.run(query, **kwargs)
    
    df = pd.DataFrame([r.values() for r in result], columns=result.keys())
    
    return df

def my_neo4j_number_nodes_relationships():
    "print the number of nodes and relationships"
   
    
    query = """
        match (n) 
        return n.name as node_name, labels(n) as labels
        order by n.name
    """
    
    df = my_neo4j_run_query_pandas(query)
    
    number_nodes = df.shape[0]
    
    
    query = """
        match (n1)-[r]->(n2) 
        return n1.name as node_name_1, labels(n1) as node_1_labels, 
            type(r) as relationship_type, n2.name as node_name_2, labels(n2) as node_2_labels
        order by node_name_1, node_name_2
    """
    
    df = my_neo4j_run_query_pandas(query)
    
    number_relationships = df.shape[0]
    
    print("-------------------------")
    print("  Nodes:", number_nodes)
    print("  Relationships:", number_relationships)
    print("-------------------------")

def my_neo4j_create_node(station_name):
    "create a node with label Station"
    
    query = """
    
    CREATE (:Station {name: $station_name})
    
    """
    
    session.run(query, station_name=station_name)

def my_neo4j_create_relationship_one_way(from_station, to_station, weight):
    "create a relationship one way between two stations with a weight"
    
    query = """
    
    MATCH (from:Station), 
          (to:Station)
    WHERE from.name = $from_station and to.name = $to_station
    CREATE (from)-[:LINK {weight: $weight}]->(to)
    
    """
    
    session.run(query, from_station=from_station, to_station=to_station, weight=weight)
    
def my_neo4j_create_relationship_two_way(from_station, to_station, weight):
    "create relationships two way between two stations with a weight"
    
    query = """
    
    MATCH (from:Station), 
          (to:Station)
    WHERE from.name = $from_station and to.name = $to_station
    CREATE (from)-[:LINK {weight: $weight}]->(to),
           (to)-[:LINK {weight: $weight}]->(from)
    
    """
    
    session.run(query, from_station=from_station, to_station=to_station, weight=weight)
    

In [33]:
connection = psycopg2.connect(
    user = "postgres",
    password = "ucb",
    host = "postgres",
    port = "5432",
    database = "postgres"
)

cursor = connection.cursor()

my_neo4j_wipe_out_database() 

my_neo4j_number_nodes_relationships()

-------------------------
  Nodes: 0
  Relationships: 0
-------------------------


## 1. Create station nodes

In [34]:
connection.rollback()

query = """

select station
from stations
order by station

"""

cursor.execute(query)

connection.rollback()

rows = cursor.fetchall()

for row in rows:
    
    station = row[0]
    
    my_neo4j_create_node(station)
    

## 2. Create connections between stations to Ashby with costs

connection.rollback()

query = f"""

create extension cube;
create extension earthdistance;

SELECT 
    station_1, 
    station_2,
    (point(s1.longitude,s1.latitude) <@> point(s2.longitude,s2.latitude)) * {estimate_cost_per_mile} as cost
FROM travel_times
JOIN stations s1 ON s1.station = station_1
JOIN stations s2 ON s2.station = station_2

"""

cursor.execute(query)

connection.rollback()

rows = cursor.fetchall()

for row in rows:
    
    station_1 = row[0]
    station_2 = row[1]
    cost = row[2]
    
    my_neo4j_create_relationship_two_way(station_1, station_2, cost)
    

In [35]:
connection.rollback()

# https://www.bart.gov/sites/default/files/docs/F%26S%20WS%203_%202017%20ENGL.pdf

my_neo4j_create_relationship_one_way('Downtown Berkeley', 'Ashby', 1.95)
my_neo4j_create_relationship_one_way('North Berkeley', 'Ashby', 1.95)
my_neo4j_create_relationship_one_way('El Cerrito Plaza', 'Ashby', 1.95)
my_neo4j_create_relationship_one_way('El Cerrito del Norte', 'Ashby', 1.95)
my_neo4j_create_relationship_one_way('Richmond', 'Ashby', 1.95)
my_neo4j_create_relationship_one_way('Lake Merritt', 'Ashby', 1.95)
my_neo4j_create_relationship_one_way('Fruitvale', 'Ashby', 2.15)
my_neo4j_create_relationship_one_way('Coliseum', 'Ashby', 2.45)
my_neo4j_create_relationship_one_way('OAK Airport', 'Ashby', 8.45)
my_neo4j_create_relationship_one_way('San Leandro', 'Ashby', 2.90)
my_neo4j_create_relationship_one_way('Bay Fair', 'Ashby', 3.25)
my_neo4j_create_relationship_one_way('Hayward', 'Ashby', 3.50)
my_neo4j_create_relationship_one_way('South Hayward', 'Ashby', 3.80)
my_neo4j_create_relationship_one_way('Union City', 'Ashby', 4.30)
my_neo4j_create_relationship_one_way('Fremont', 'Ashby', 4.65)
my_neo4j_create_relationship_one_way('Warm Springs', 'Ashby', 5.10)
my_neo4j_create_relationship_one_way('Castro Valley', 'Ashby', 3.50)
my_neo4j_create_relationship_one_way('West Dublin', 'Ashby', 4.45)
my_neo4j_create_relationship_one_way('Dublin', 'Ashby', 4.65)

my_neo4j_create_relationship_one_way('Pittsburg', 'Ashby', 4.75)
my_neo4j_create_relationship_one_way('North Concord', 'Ashby', 4.20)
my_neo4j_create_relationship_one_way('Concord', 'Ashby', 4.00)
my_neo4j_create_relationship_one_way('Pleasant Hills', 'Ashby', 3.60)
my_neo4j_create_relationship_one_way('Walnut Creek', 'Ashby', 3.45)
my_neo4j_create_relationship_one_way('Lafayette', 'Ashby', 2.95)
my_neo4j_create_relationship_one_way('Orinda', 'Ashby', 2.35)
my_neo4j_create_relationship_one_way('Rockridge', 'Ashby', 1.95)
my_neo4j_create_relationship_one_way('MacArthur', 'Ashby', 1.95)
my_neo4j_create_relationship_one_way('19th Street', 'Ashby', 1.95)
my_neo4j_create_relationship_one_way('12th Street', 'Ashby', 1.95)
my_neo4j_create_relationship_one_way('West Oakland', 'Ashby', 1.95)
my_neo4j_create_relationship_one_way('Embarcadero', 'Ashby', 3.85)
my_neo4j_create_relationship_one_way('Montgomery', 'Ashby', 3.85)
my_neo4j_create_relationship_one_way('Powell', 'Ashby', 3.85)
my_neo4j_create_relationship_one_way('Civic Center', 'Ashby', 3.85)
my_neo4j_create_relationship_one_way('16th Street Mission', 'Ashby', 4.05)
my_neo4j_create_relationship_one_way('24th Street Mission', 'Ashby', 4.10)
my_neo4j_create_relationship_one_way('Glen Park', 'Ashby', 4.25)
my_neo4j_create_relationship_one_way('Balboa Park', 'Ashby', 4.30)
my_neo4j_create_relationship_one_way('Daly City', 'Ashby', 4.45)
my_neo4j_create_relationship_one_way('Colma', 'Ashby', 4.40)
my_neo4j_create_relationship_one_way('South San Francisco', 'Ashby', 4.60)
my_neo4j_create_relationship_one_way('San Bruno', 'Ashby', 4.85)
my_neo4j_create_relationship_one_way('SFO Airport', 'Ashby', 9.45)
my_neo4j_create_relationship_one_way('MillBrae', 'Ashby', 5.15)

In [36]:
def my_neo4j_cost_estimate(from_station, to_station):
    "given a from station and to station, run and print the cost estimate"
    
    query = "CALL gds.graph.drop('ds_graph', false) yield graphName"
    session.run(query)

    query = "CALL gds.graph.project('ds_graph', 'Station', 'LINK', {relationshipProperties: 'weight'})"
    session.run(query)

    query = """

    MATCH (source:Station {name: $source}), (target:Station {name: $target})
    CALL gds.shortestPath.dijkstra.stream(
        'ds_graph', 
        { sourceNode: source, 
          targetNode: target, 
          relationshipWeightProperty: 'weight'
        }
    )
    YIELD index, sourceNode, targetNode, totalCost, nodeIds, costs, path
    RETURN
        gds.util.asNode(sourceNode).name AS from,
        gds.util.asNode(targetNode).name AS to,
        totalCost,
        [nodeId IN nodeIds | gds.util.asNode(nodeId).name] AS nodes,
        costs
    ORDER BY index

    """

    result = session.run(query, source=from_station, target=to_station)
    
    for r in result:
        
        total_cost = r['totalCost']
        
        print("\n--------------------------------")
        print("   Total Cost: ", f'${total_cost:.2f}')
        print("--------------------------------")
        
        nodes = r['nodes']
        costs = r['costs']
        
        i = 0
        previous = 0
        
        for n in nodes:
            
            print(n + ", " + f'${costs[i] - previous:.2f}'  + ", " + f'${costs[i]:.2f}')
            
            previous = costs[i]
            i += 1
    

In [37]:
my_neo4j_cost_estimate('Orinda', 'Ashby') 


--------------------------------
   Total Cost:  $2.35
--------------------------------
Orinda, $0.00, $0.00
Ashby, $2.35, $2.35
