In [2]:
import pandas as pd
import numpy as np
import neo4j
import csv
import psycopg2

In [3]:
driver = neo4j.GraphDatabase.driver(uri="neo4j://neo4j:7687", auth=("neo4j","w205"))
session = driver.session(database="neo4j")

In [22]:
def my_neo4j_shortest_path(from_station, to_station):
    "given a from station and to station, run and print the shortest path"
    
    query = "CALL gds.graph.drop('ds_graph', false)"
    session.run(query)

    query = "CALL gds.graph.project('ds_graph', 'Station', 'LINK', {relationshipProperties: 'weight'})"
    session.run(query)

    query = """

    MATCH (source:Station {name: $source}), (target:Station {name: $target})
    CALL gds.shortestPath.dijkstra.stream(
        'ds_graph', 
        { sourceNode: source, 
          targetNode: target, 
          relationshipWeightProperty: 'weight'
        }
    )
    YIELD index, sourceNode, targetNode, totalCost, nodeIds, costs, path
    RETURN
        gds.util.asNode(sourceNode).name AS from,
        gds.util.asNode(targetNode).name AS to,
        totalCost,
        [nodeId IN nodeIds | gds.util.asNode(nodeId).name] AS nodes,
        costs
    ORDER BY index

    """

    result = session.run(query, source=from_station, target=to_station)
    
    rs = []
    for r in result:
        rs.append(r)
        
    return rs[0]

In [23]:
stations = np.array(pd.read_csv("stations.csv")['station'])
stations

array(['12th Street', '16th Street Mission', '19th Street',
       '24th Street Mission', 'Antioch', 'Ashby', 'Balboa Park',
       'Bay Fair', 'Berryessa', 'Castro Valley', 'Civic Center',
       'Coliseum', 'Colma', 'Concord', 'Daly City', 'Downtown Berkeley',
       'Dublin', 'El Cerrito del Norte', 'El Cerrito Plaza',
       'Embarcadero', 'Fremont', 'Fruitvale', 'Glen Park', 'Hayward',
       'Lafayette', 'Lake Merritt', 'MacArthur', 'Millbrae', 'Milpitas',
       'Montgomery Street', 'North Berkeley', 'North Concord', 'OAK',
       'Orinda', 'Pittsburg', 'Pittsburg Center', 'Pleasant Hill',
       'Powell Street', 'Richmond', 'Rockridge', 'San Bruno',
       'San Leandro', 'SFO', 'South Hayward', 'South San Francisco',
       'Union City', 'Walnut Creek', 'Warm Springs', 'West Dublin',
       'West Oakland'], dtype=object)

In [39]:
def construct_tree(distribution_centers):
    origin_point = []
    destination_point = []
    cost = []
    
    for station in stations:
        best_paths = []
        for dc in distribution_centers:
            best_paths.append(my_neo4j_shortest_path('depart ' + dc, 'arrive ' + station))
        best_idx = 0
        for i in range(1, len(best_paths)):
            if best_paths[best_idx]['totalCost'] > best_paths[i]['totalCost']:
                best_idx = i
        best_path = best_paths[best_idx]
        for i in range(len(best_path['nodes']) - 1):
            origin_point.append(best_path['nodes'][i])
            destination_point.append(best_path['nodes'][i + 1])
            cost.append(best_path['costs'][i])
            
    connections_df = pd.DataFrame({"Origin" : origin_point, "Destination" : destination_point, "Cost" : cost})
    connections_df = connections_df.drop_duplicates()
    return connections_df

In [40]:
construct_tree(['Glen Park','MacArthur','Pittsburg Center','Union City'])

Unnamed: 0,Origin,Destination,Cost
0,depart MacArthur,yellow MacArthur,0.0
1,yellow MacArthur,yellow 19th Street,0.0
2,yellow 19th Street,yellow 12th Street,180.0
3,yellow 12th Street,arrive 12th Street,300.0
4,depart Glen Park,green Glen Park,0.0
...,...,...,...
248,blue West Dublin,arrive West Dublin,1683.0
250,red MacArthur,red 19th Street,0.0
251,red 19th Street,red 12th Street,180.0
252,red 12th Street,red West Oakland,300.0
