# Communities algorithm

This notebook creates a simplified Bart transit graph.

The weights are computed based on travel times and ridership.

Then we run Louvain Modularity to calculate communities.

# Helper Code

In [27]:
import neo4j

import csv

import math
import numpy as np
import pandas as pd

import psycopg2

In [28]:
driver = neo4j.GraphDatabase.driver(uri="neo4j://neo4j:7687", auth=("neo4j","ucb_mids_w205"))

In [29]:
session = driver.session(database="neo4j")

In [30]:
def my_neo4j_wipe_out_database():
    "wipe out database by deleting all nodes and relationships"
    
    query = "match (node)-[relationship]->() delete node, relationship"
    session.run(query)
    
    query = "match (node) delete node"
    session.run(query)

In [31]:
def my_neo4j_run_query_pandas(query, **kwargs):
    "run a query and return the results in a pandas dataframe"
    
    result = session.run(query, **kwargs)
    
    df = pd.DataFrame([r.values() for r in result], columns=result.keys())
    
    return df

In [32]:
def my_neo4j_number_nodes_relationships():
    "print the number of nodes and relationships"
   
    
    query = """
        match (n) 
        return n.name as node_name, labels(n) as labels
        order by n.name
    """
    
    df = my_neo4j_run_query_pandas(query)
    
    number_nodes = df.shape[0]
    
    
    query = """
        match (n1)-[r]->(n2) 
        return n1.name as node_name_1, labels(n1) as node_1_labels, 
            type(r) as relationship_type, n2.name as node_name_2, labels(n2) as node_2_labels
        order by node_name_1, node_name_2
    """
    
    df = my_neo4j_run_query_pandas(query)
    
    number_relationships = df.shape[0]
    
    print("-------------------------")
    print("  Nodes:", number_nodes)
    print("  Relationships:", number_relationships)
    print("-------------------------")


In [33]:
def my_neo4j_create_node(station_name):
    "create a node with label Station"
    
    query = """
    
    CREATE (:Station {name: $station_name})
    
    """
    
    session.run(query, station_name=station_name)
    

In [34]:
def my_neo4j_create_relationship_one_way(from_station, to_station, weight):
    "create a relationship one way between two stations with a weight"
    
    query = """
    
    MATCH (from:Station), 
          (to:Station)
    WHERE from.name = $from_station and to.name = $to_station
    CREATE (from)-[:LINK {weight: $weight}]->(to)
    
    """
    
    session.run(query, from_station=from_station, to_station=to_station, weight=weight)
    

In [35]:
def my_neo4j_create_relationship_two_way(from_station, to_station, weight):
    "create relationships two way between two stations with a weight"
    
    query = """
    
    MATCH (from:Station), 
          (to:Station)
    WHERE from.name = $from_station and to.name = $to_station
    CREATE (from)-[:LINK {weight: $weight}]->(to),
           (to)-[:LINK {weight: $weight}]->(from)
    
    """
    
    session.run(query, from_station=from_station, to_station=to_station, weight=weight)
    

In [36]:
connection = psycopg2.connect(
    user = "postgres",
    password = "ucb",
    host = "postgres",
    port = "5432",
    database = "postgres"
)

In [37]:
cursor = connection.cursor()

# Create 1 node for each station

In [38]:
my_neo4j_wipe_out_database()

In [39]:
connection.rollback()

query = """

select station
from stations
order by station

"""

cursor.execute(query)

connection.rollback()

rows = cursor.fetchall()

for row in rows:
    station = row[0]
    my_neo4j_create_node(station)

In [40]:
 my_neo4j_number_nodes_relationships() 

-------------------------
  Nodes: 50
  Relationships: 0
-------------------------


# Create 1 relationship between stations

In [41]:
def get_normalized_exits_for_station(station):
    query = f"""

    SELECT normalized_exits
    FROM ridership
    WHERE station_name = '{station}'
    ORDER by 1

    """

    cursor.execute(query)

    connection.rollback()

    rows = cursor.fetchall()
    if not rows:
        print(station)
    return rows[0][0]

In [42]:
def get_station_weight(normalized_exits, time):
    return normalized_exits * 1000

In [43]:
connection.rollback()

query = """

SELECT station_1, station_2, travel_time
FROM travel_times
ORDER by 1, 2, 3

"""

cursor.execute(query)

connection.rollback()

rows = cursor.fetchall()

for row in rows:
    station_1 = row[0]
    station_2 = row[1]
    travel_time = int(row[2])
    normalized_exits = get_normalized_exits_for_station(station_1)

    my_neo4j_create_relationship_two_way(
        station_1,
        station_2,
        get_station_weight(normalized_exits, travel_time)
    )

In [44]:
 my_neo4j_number_nodes_relationships() 

-------------------------
  Nodes: 50
  Relationships: 102
-------------------------


In [25]:
get_exits_for_station("Ashby")

2077


# Run Community Detection Algorithm

In [45]:
query = "CALL gds.graph.drop('ds_graph', false) yield graphName"
session.run(query)

query = "CALL gds.graph.project('ds_graph', 'Station', 'LINK', {relationshipProperties: 'weight'})"
session.run(query)

<neo4j._sync.work.result.Result at 0x7f69ca2033d0>

In [50]:
query = """

CALL gds.louvain.stream('ds_graph', {includeIntermediateCommunities: true})
YIELD nodeId, communityId, intermediateCommunityIds
RETURN gds.util.asNode(nodeId).name AS Name, communityId as community, intermediateCommunityIds as intermediate_community
ORDER BY community, Name ASC

"""

df = my_neo4j_run_query_pandas(query)
df


Unnamed: 0,Name,community,intermediate_community
0,Ashby,2,"[15, 2]"
1,Downtown Berkeley,2,"[15, 2]"
2,El Cerrito Plaza,2,"[38, 2]"
3,El Cerrito del Norte,2,"[38, 2]"
4,North Berkeley,2,"[15, 2]"
5,Richmond,2,"[38, 2]"
6,16th Street Mission,10,"[3, 10]"
7,24th Street Mission,10,"[3, 10]"
8,Civic Center,10,"[10, 10]"
9,Glen Park,10,"[3, 10]"


# Set Community as Attribute

In [51]:
def my_neo4j_set_node_community(station_name, community):
    "sets a community attribute on a node with the given station name"
    
    query = """
    
    MATCH (s:Station {name: $station_name})
    SET s.community = $community
    
    """
    
    session.run(query, station_name=station_name, community=community)

In [52]:
for _, row in df.iterrows():
    station = row[0]
    community = row[1]
    my_neo4j_set_node_community(station, community)

In [55]:
def my_neo4j_set_node_community_label(station_name, community):
    "sets a community label on a node with the given station name"
    
    query = f"""
    
    MATCH (s:Station {{name: $station_name}})
    SET s:Community_{community}
    
    """
    
    session.run(query, station_name=station_name, community=community)

In [56]:
for _, row in df.iterrows():
    station = row[0]
    community = row[1]
    my_neo4j_set_node_community_label(station, community)