In [1]:
from neo4j import GraphDatabase
import pandas as pd
import datetime
from datetime import date, timedelta
from random import uniform
from ipyleaflet import Map, Heatmap, basemaps, WidgetControl
import ipywidgets as widgets

In [2]:
uri = "bolt://localhost:7687"

In [3]:
driver = GraphDatabase.driver(uri, auth=("neo4j", "SFi5gS09lHiMfu9VuJTg"))

In [4]:
# output widget
output = widgets.Output()

In [5]:
def get_complaints(tx, from_date, to_date, offense):
    complaints = []
    
    result = tx.run(
        "MATCH (o:Offense)<-[:COMMITTED_OFFENSE]-(c:Complaint)-[:LOCATED_AT]->(l:Location) \
        WHERE c.complaint_date >= $from_date and c.complaint_date <= $to_date and o.offense in $offense \
        RETURN l.louvain[-1] as community, count(c) as number_of_complaints \
        ORDER BY number_of_complaints desc",
        from_date=from_date,
        to_date=to_date,
        offense=offense,
    )
    
    for record in result:
        complaints.append({
            "community": record["community"],
            "complaints": record["number_of_complaints"]
        })
        
    return complaints

In [6]:
def get_central_locations(tx, from_date, to_date, offense):
    locations = []
    
    result = tx.run(
        "MATCH (o:Offense)<-[:COMMITTED_OFFENSE]-(c:Complaint)-[:LOCATED_AT]->(l:Location) \
        WHERE c.complaint_date >= $from_date and c.complaint_date <= $to_date and o.offense in $offense \
        with l.louvain[-1] as community, max(l.pagerank) as pagerank \
        match(l:Location) \
        where l.pagerank = pagerank and l.louvain[-1] = community \
        return  apoc.convert.toFloat(l.latitude) as latitude, apoc.convert.toFloat(l.longitude) as longitude, community",
        from_date=from_date,
        to_date=to_date,
        offense=offense,
    )
    
    for record in result:
        locations.append({
            "latitude": record["latitude"],
            "longitude": record["longitude"],
            "community": record["community"]
        })
        
    return locations

In [7]:
def get_offenses(tx):
    offenses = []
    
    result = tx.run(
        "MATCH (o:Offense) \
        return  o.offense as offense")
    for record in result:
        offenses.append(record["offense"])
    return offenses

In [8]:
def get_louvain(tx, query):
    locs = []
    results = tx.run(query)
        
    for result in results:
        locs.append({
            "location_id": result['location_id'],
            "latitude": result['latitude'],
            "longitude": result['longitude'],
            "community": result['community']
        })
    return locs

In [9]:
def get_complaints_by_loc(tx, query):
    locs = []
    results = tx.run(query)
        
    for result in results:
        locs.append({
            "location_id": result['location_id'],
            "number_of_complaints": result['number_of_complaints']
        })
    return locs

In [10]:
# map properties

center = [40.7128, -74.0060]
zoom = 9.5
m = Map(basemap=basemaps.CartoDB.Positron, center=center, zoom=zoom)
heatmap = Heatmap(locations=[], radius=20)

In [11]:
# def calculate_clusters(dates, offenses):
#     with driver.session() as session:
#         complaints = session.read_transaction(get_complaints, str(dates[0]), str(dates[1]), offenses)
#         complaints_df = pd.DataFrame(complaints)
        
#         locations = session.read_transaction(get_central_locations, dates[0], dates[1], offenses)
#         loc_df = pd.DataFrame(locations)
        
#         df = complaints_df.merge(loc_df, how='outer', on='community')
        
#         try:
#             m.remove_layer()
#         except:
#             pass
        
#         lat_lon = list(df.apply(lambda x: (x['latitude'], x['longitude'],x['complaints']), axis=1))
                
#         m.add_layer(Heatmap(locations=lat_lon, radius=20))
    
    

In [12]:
def calculate_clusters2(dates, offenses):
    
    from_date = f"'{dates[0]}'"
    to_date = f"'{dates[1]}'"
    
    louvain_query = 'CALL gds.louvain.stream({  \n\
    nodeQuery: "MATCH (l:Location)<-[:LOCATED_AT]-(c:Complaint)-[:COMMITTED_OFFENSE]->(o:Offense) ' \
    'where o.offense in '+ str(offenses) + ' and c.complaint_date >= ' + from_date + ' and c.complaint_date <= ' + to_date + '  return distinct id(l) as id",\n\
    relationshipQuery: "MATCH (c:Complaint)-[:LOCATED_AT]->(l1:Location)-[:LOCATED_NEARBY]->(l2:Location) return id(l1) as source, id(l2) as target",\n\
    validateRelationships: false,\n\
    maxIterations: 50})\n\
    yield nodeId, communityId\n\
    RETURN gds.util.asNode(nodeId).location_id AS location_id, gds.util.asNode(nodeId).latitude AS latitude, gds.util.asNode(nodeId).longitude AS longitude, communityId as community'
    
    number_of_complaints_query = 'MATCH (l:Location)<-[:LOCATED_AT]-(c:Complaint)-[:COMMITTED_OFFENSE]->(o:Offense) where o.offense in '+ str(offenses) + ' and c.complaint_date >= ' + from_date + ' and c.complaint_date <= ' + to_date + ' return distinct l.location_id as location_id, count(c) as number_of_complaints'
    
    
    with driver.session() as session:
        louvain = session.read_transaction(get_louvain, louvain_query)
        louvain_df = pd.DataFrame(louvain)
        
        louvain_df['latitude'] = louvain_df.latitude.astype(float)
        louvain_df['longitude'] = louvain_df.longitude.astype(float)
        
        complaints = session.read_transaction(get_complaints_by_loc, number_of_complaints_query)
        complaints_df = pd.DataFrame(complaints)
        
        df_merge = louvain_df.merge(complaints_df, on=['location_id'])
        
        clusters = df_merge.groupby('community').agg({'latitude': lambda x: x.mean(), 'longitude': lambda x: x.mean(),'number_of_complaints': lambda x: x.sum()})
        
        try:
            m.remove_layer()
        except:
            pass
        
        lat_lon = list(clusters.apply(lambda x: (x['latitude'], x['longitude'],x['number_of_complaints']), axis=1))
                
        m.add_layer(Heatmap(locations=lat_lon, radius=20))

In [13]:
with driver.session() as session:
    offenses = session.read_transaction(get_offenses)

In [14]:
offenses = sorted(offenses)

In [15]:
# date properties
sdate = date(2019,1,1)   # start date
edate = date(2021,3,31)   # end date

date_range = pd.date_range(sdate,edate-timedelta(days=1),freq='d')
date_range = [d.strftime('%Y-%m-%d') for d in date_range]

In [16]:
# function called by click event
def on_button_clicked(b):
    with output:
        print(list(offenses_widget.value))
        print(date_selection.value)
    
        m.clear_layers()
        m.add_layer(basemaps.CartoDB.Positron)
    
    calculate_clusters2(date_selection.value, list(offenses_widget.value))

In [17]:
# analysis widget components

# widget to select multiple offenses for the analysis
offenses_widget = widgets.SelectMultiple(
    options=offenses,
    value=[offenses[0]],
    description='Offenses',
    disabled=False,
    layout={'width': '500px'}
)

# widget to generate the clusters in heat map based on the offenses and dates selected
button = widgets.Button(
    description='Show Clusters',
    disabled=False,
    button_style='', # 'success', 'info', 'warning', 'danger' or ''
    tooltip='Show clusters',
    icon='network-wired' # (FontAwesome names without the `fa-` prefix)
)

# date select widget to select a range of dates for the analysis
date_selection = widgets.SelectionRangeSlider(
    options=date_range,
    index=(0, len(date_range)-1),
    description='Date Range',
    disabled=False,
    layout={'width': '700px'}
)

play = widgets.Play(
    value=0,
    min=0,
    max=100,
    interval=500,
    step=1,
    description="Press play",
    disabled=False
)
slider = widgets.IntSlider()
widgets.jslink((play, 'value'), (slider, 'value'))
hbox = widgets.HBox([play,date_selection])

offenses_control = WidgetControl(widget=offenses_widget, position='bottomleft', display="flex", width=500)
date_control = WidgetControl(widget=date_selection, position='bottomright', display="flex", width=700, min_width=500, max_width=1000)
button_control = WidgetControl(widget=button, position='topright')
slider_control = WidgetControl(widget=hbox, position='bottomright')        
m.add_control(offenses_control)
# m.add_control(date_control)
m.add_control(button_control)
m.add_control(slider_control)

# button click event
button.on_click(on_button_clicked)

In [18]:
m

Map(center=[40.7128, -74.006], controls=(ZoomControl(options=['position', 'zoom_in_text', 'zoom_in_title', 'zo…

In [23]:
def callback(change):
    print(change)
    # you can also get the value with play._playing
    if change['new']:
        dates = [d.strftime('%Y-%m-%d') for d in pd.date_range(date.fromisoformat(date_selection.value[0]),date.fromisoformat(date_selection.value[1]),freq='d')]
        play.max = len(dates)-1
        m.clear_layers()
        m.add_layer(basemaps.CartoDB.Positron)
        calculate_clusters2((dates[play.value],dates[play.value]), list(offenses_widget.value))
        
play.observe(callback, names=['_playing'])