In [1]:
import neo4j

import csv

import math
import numpy as np
import pandas as pd

import psycopg2

In [3]:
driver = neo4j.GraphDatabase.driver(uri="neo4j://neo4j:7687", auth=("neo4j","w205"))

session = driver.session(database="neo4j")

def my_neo4j_wipe_out_database():
    "wipe out database by deleting all nodes and relationships"
    
    query = "match (node)-[relationship]->() delete node, relationship"
    session.run(query)
    
    query = "match (node) delete node"
    session.run(query)
    
def my_neo4j_run_query_pandas(query, **kwargs):
    "run a query and return the results in a pandas dataframe"
    
    result = session.run(query, **kwargs)
    
    df = pd.DataFrame([r.values() for r in result], columns=result.keys())
    
    return df

def my_neo4j_number_nodes_relationships():
    "print the number of nodes and relationships"
    
    query = """
        match (n) 
        return n.name as node_name, labels(n) as labels
        order by n.name
    """
    
    df = my_neo4j_run_query_pandas(query)
    
    number_nodes = df.shape[0]
    
    
    query = """
        match (n1)-[r]->(n2) 
        return n1.name as node_name_1, labels(n1) as node_1_labels, 
            type(r) as relationship_type, n2.name as node_name_2, labels(n2) as node_2_labels
        order by node_name_1, node_name_2
    """
    
    df = my_neo4j_run_query_pandas(query)
    
    number_relationships = df.shape[0]
    
    print("-------------------------")
    print("  Nodes:", number_nodes)
    print("  Relationships:", number_relationships)
    print("-------------------------")
    
def my_neo4j_create_node(station_name):
    "create a node with label Station"
    
    query = """
    
    CREATE (:Station {name: $station_name})
    
    """
    
    session.run(query, station_name=station_name)
    
def my_neo4j_create_relationship_one_way(from_station, to_station, weight):
    "create a relationship one way between two stations with a weight"
    
    query = """
    
    MATCH (from:Station), 
          (to:Station)
    WHERE from.name = $from_station and to.name = $to_station
    CREATE (from)-[:LINK {weight: $weight}]->(to)
    
    """
    
    session.run(query, from_station=from_station, to_station=to_station, weight=weight)

In [4]:
#Clearing out neo4j database

my_neo4j_wipe_out_database()
my_neo4j_number_nodes_relationships()

-------------------------
  Nodes: 0
  Relationships: 0
-------------------------


In [5]:
#Listing stations
stations = np.array(pd.read_csv("stations.csv")['station'])
stations

array(['12th Street', '16th Street Mission', '19th Street',
       '24th Street Mission', 'Antioch', 'Ashby', 'Balboa Park',
       'Bay Fair', 'Berryessa', 'Castro Valley', 'Civic Center',
       'Coliseum', 'Colma', 'Concord', 'Daly City', 'Downtown Berkeley',
       'Dublin', 'El Cerrito del Norte', 'El Cerrito Plaza',
       'Embarcadero', 'Fremont', 'Fruitvale', 'Glen Park', 'Hayward',
       'Lafayette', 'Lake Merritt', 'MacArthur', 'Millbrae', 'Milpitas',
       'Montgomery Street', 'North Berkeley', 'North Concord', 'OAK',
       'Orinda', 'Pittsburg', 'Pittsburg Center', 'Pleasant Hill',
       'Powell Street', 'Richmond', 'Rockridge', 'San Bruno',
       'San Leandro', 'SFO', 'South Hayward', 'South San Francisco',
       'Union City', 'Walnut Creek', 'Warm Springs', 'West Dublin',
       'West Oakland'], dtype=object)

In [6]:
#Creating station nodes
for station in stations:
    my_neo4j_create_node(station)
my_neo4j_number_nodes_relationships()

-------------------------
  Nodes: 50
  Relationships: 0
-------------------------


In [7]:
#Listing edges
edges = pd.read_csv("package_influence.csv")
edges

Unnamed: 0,Origin Station,Destination Station,Weight
0,12th Street,19th Street,37006020
1,12th Street,Lake Merritt,12080944
2,12th Street,West Oakland,11325885
3,16th Street Mission,24th Street Mission,20144835
4,16th Street Mission,Civic Center,26065080
...,...,...,...
97,West Dublin,Castro Valley,2612160
98,West Dublin,Dublin,2756529
99,West Oakland,12th Street,18045193
100,West Oakland,Embarcadero,26424384


In [8]:
#Drawing edges
for i in range(len(edges)):
    my_neo4j_create_relationship_one_way(edges['Origin Station'][i], edges['Destination Station'][i], str(edges['Weight'][i]))
    
my_neo4j_number_nodes_relationships()

-------------------------
  Nodes: 50
  Relationships: 102
-------------------------


In [22]:
#Calling PageRank
def page_rank():
    
    query = "CALL gds.graph.drop('ds_graph', false)"
    session.run(query)

    query = "CALL gds.graph.project('ds_graph', 'Station', 'LINK', {relationshipProperties: toInteger('weight')})"
    session.run(query)

    query = """

    CALL gds.pageRank.stream('ds_graph')
    YIELD nodeId, score
    RETURN gds.util.asNode(nodeId).name as name, score
    ORDER BY score DESC
    
    """

    result = session.run(query)
    names = []
    scores = []
    for r in result:
        names.append(r['name'])
        scores.append(r['score'])
    return pd.DataFrame({"Name" : names, "Score" : scores})

In [23]:
page_rank()

Unnamed: 0,Name,Score
0,Bay Fair,1.332615
1,Coliseum,1.33082
2,San Bruno,1.26834
3,MacArthur,1.256488
4,West Oakland,1.160505
5,Lake Merritt,1.157878
6,12th Street,1.15003
7,Pittsburg Center,1.116606
8,Milpitas,1.11467
9,El Cerrito del Norte,1.109565
