In [None]:
# pip install geopandas dash
# pip install jupyter-dash

In [None]:
import pandas as pd
# import geopandas as gpd
# import cudf
import numpy as np

import plotly.express as px
import plotly.graph_objects as go
from jupyter_dash import JupyterDash
import dash
import dash_core_components as dcc
import dash_html_components as html
from dash.dependencies import Input, Output, State
import json
from geopy.geocoders import Nominatim
from crime_clustering import CrimeCluster
from geopy.distance import great_circle
import pickle5 as pickle
from sklearn.linear_model import LinearRegression
import base64
from zillowscraper import address_to_price_and_image
import requests

In [None]:
mapbox_token = open('tokens/mapbox-token.txt').read()
px.set_mapbox_access_token(mapbox_token)

In [None]:
df = pd.read_csv('data/crime-clean.csv')
df = df[(df['Year'] < 2021)]

In [None]:
df.sample(15)

In [None]:
df.info()

In [None]:
all_crimes = 'ALL'

In [None]:
def crime_score(location_column):
    
    group = [location_column]
    group_type = ['Crime Type', location_column]
     
    df_location = df.groupby(group, as_index=False).agg({'Crime Score': 'mean', 'CSperCapita': 'mean'})
    df_location['Crime Type'] = all_crimes
    
    df_location_type = df.groupby(group_type, as_index=False).agg({'Crime Score': 'mean', 'CSperCapita': 'mean'})
    
    df_location_all = pd.concat([df_location, df_location_type], axis=0)
    
    return df_location_all

In [None]:
df_neigh = crime_score('Neighborhood')
df_neigh.head()

In [None]:
df_zip = crime_score('Zip Code')
df_zip.head()

In [None]:
# df_neigh = df.groupby(['Neighborhood'], as_index=False).agg({'Crime Score': 'mean', 'CSperCapita': 'mean'})
# df_neigh.head()

In [None]:
# df_zip = df.groupby(['Zip Code'], as_index=False).agg({'Crime Score': 'mean', 'CSperCapita': 'mean'})
# df_zip.head()

In [None]:
def agg_crime(location_column=None):
    
    if location_column is None:
        group = ['Year']
        group_type = ['Crime Type', 'Year']
    else:
        group = ['Year', location_column]
        group_type = ['Crime Type' , 'Year', location_column]
    
    df_location = df.groupby(group, as_index=False)\
                        .agg({'Latitude': 'count'}).rename(columns={'Latitude': 'Count'})
    df_location['Crime Type'] = all_crimes
    df_location_type = df.groupby(group_type, as_index=False)\
                            .agg({'Latitude': 'count'}).rename(columns={'Latitude': 'Count'})
    df_location_all = pd.concat([df_location, df_location_type], axis=0)
    return df_location_all

In [None]:
df_crime_chicago = agg_crime()
df_crime_chicago.head()

In [None]:
df_crime_neigh = agg_crime('Neighborhood')
df_crime_neigh.head()

In [None]:
df_crime_zip = agg_crime('Zip Code')
df_crime_zip.head()

In [None]:
with open('data/geo/Neighborhoods.geojson') as Neigh:
    geodict = {'json_neigh': json.load(Neigh)}

In [None]:
with open('data/geo/ZIP.geojson') as ZIP22:
    geodict['json_zip'] = json.load(ZIP22)

In [None]:
cCluster = CrimeCluster(df, geodict['json_neigh'], geodict['json_zip'])

In [None]:
crime_types = [all_crimes] + list(df['Crime Type'].unique())

In [None]:
with open('data/clusters/chicago/crime_types_clusters.pickle', 'rb') as handle:
    chicago_cluster_data = pickle.load(handle)

In [None]:
df_chicago_clusters = pd.DataFrame.from_dict(chicago_cluster_data)

In [None]:
# crime_types

In [None]:
external_stylesheets = ['https://codepen.io/chriddyp/pen/bWLwgP.css']
# css documentation at https://codepen.io/chriddyp/pen/bWLwgP

app = dash.Dash(__name__, external_stylesheets=external_stylesheets)
# app = JupyterDash(__name__, external_stylesheets=external_stylesheets)

In [None]:
def src_image(src, URL=False):
    if not URL:
        encoded = base64.b64encode(open(src, 'rb').read())
    else:
        encoded = base64.b64encode(requests.get(src).content)
    return 'data:image/png;base64,{}'.format(encoded.decode())

In [None]:
form_address = html.Div(children=[
    html.Div(
        className='row',
        children = [
            html.Div(
                className='six columns',
                children = [
                    html.Label("Address", htmlFor="input-address", id="label-address"),
                    dcc.Input(id="input-address", type="search", placeholder="Address", className='u-full-width')
                ]
            ),
            html.Div(
                className='six columns',
                children = [
                    html.Label("Miles", htmlFor='input-miles', id='label-miles'),
                    dcc.Input(id='input-miles', type='number', value=1, className='u-full-width')
                ]
            )
        ]
    ),
    html.Div(
        className='row',
        children=[
            html.Button('Submit', id='btn-submit-address', className='button-primary', style={'margin-top': '10px'})
        ]
    )
])

In [None]:
form_chart_options = html.Div(children=[
    html.Div(
        className='row',
        children=[
            html.Div(
                className='six columns',
                children=[
                    html.Label("Crime Type", htmlFor='crime-types', id='label-crime-types'),
                    dcc.Dropdown(id='crime-types', className='u-full-width',
                        options = [
                            {'label': i, 'value': i} for i in crime_types
                        ],
                        value = 'ALL')
                ]
            )
        ]
    ),
    html.Div(
        className='row',
        children = [
            dcc.RadioItems(id='location-select',
                options = [{'label': i, 'value': i} for i in ['Neighborhood', 'Zip Code']],
                value = 'Neighborhood',
                labelStyle = {'display': 'inline-block'})
        ]
    )
])

In [None]:
html_zillow = html.Div(
    children=[
        html.Div(
            id='house-image',
            children=[],
            style={'height': 211}
        ),
        html.Div(id='price', children=[])
    ]
)

In [None]:
app.layout = html.Div([
    html.Div(
        className="row",
        children = [
            html.H1("Chicago Crime & Real Estate", style={'text-align': 'center'})
        ]
    ),
    html.Div(
        className="row",
        children = [
            html.Div(
                className="six columns",
                children = [
                    form_address,
                    html.Br(),
                    form_chart_options,
                    dcc.Graph(id='chicago-map')
                ]
            ),
            html.Div(
                className="six columns",
                children = [
                    html_zillow,
                    dcc.Graph(id='crime-chart'),
                    dcc.Graph(id='crime-cluster-map')
                ]
            )
        ]
    ),
    html.Div(
        id='meta-data',
        className='row',
        children=[
            html.Div(id='crime-clusters', style={'display': 'none'}),
            html.Div(id='callback-data', style={'display': 'none'})
        ]
    )
])

In [None]:
@app.callback(
    Output('chicago-map', 'figure'),
    [Input('location-select', 'value'),
    Input('crime-types', 'value')]
)
def update_chicago_map(value, crime_type):

    if value == 'Neighborhood':
        data_frame = df_neigh[df_neigh['Crime Type'] == crime_type]
        locations = 'Neighborhood'
        geojson = geodict['json_neigh']
        featureidkey = 'properties.pri_neigh'
    else:
        data_frame = df_zip[df_zip['Crime Type'] == crime_type]
        locations = 'Zip Code'
        geojson = geodict['json_zip']
        featureidkey = 'properties.zip'

    fig = px.choropleth_mapbox(
        data_frame = data_frame,
        locations = locations,
        geojson = geojson,
        featureidkey = featureidkey,
        mapbox_style = 'carto-positron',
        color = 'Crime Score',
        center = {'lat': 41.84, 'lon': -87.6298},
        zoom = 9,
        opacity = 0.5,
        labels = {'Weighted Score': 'CSperCapita'},
        height=800)

    fig.update_layout(margin={"r": 0, "t": 0, "l": 0, "b": 0})

    return fig

In [None]:
def ellipses(means, covariances, n_per_cluster=None):
    ellipse_layers = []
    if n_per_cluster is None:
        n_per_cluster = np.ones(covariances.shape[0])
    else:
        max_crimes = np.max(n_per_cluster)
        n_per_cluster /= max_crimes
        n_per_cluster[n_per_cluster < 0.25] = 0.25
    t = np.linspace(0, 2*np.pi, 20)
    for i in range(covariances.shape[0]):
        covariance_matrix = covariances[i]
        center_x, center_y = means.lon[i], means.lat[i]
        opacity_scale = n_per_cluster[i]
        v, w = np.linalg.eigh(covariance_matrix)
        u = w[0] / np.linalg.norm(w[0])
        shift_angle = np.arctan2(u[1], u[0])
        v = 2. * np.sqrt(2.) * np.sqrt(v)
        a = v[0]
        b = v[1]
        for i in range(40):
            scale_factor = i / 40
            x = center_x + a * b / np.sqrt((b * np.cos(t))**2 + (a * np.sin(t))**2) * np.sin(t + shift_angle) * scale_factor
            y = center_y + a * b / np.sqrt((b * np.cos(t))**2 + (a * np.sin(t))**2) * np.cos(t + shift_angle) * scale_factor
            green = 255 * scale_factor
            coords=[]
            for lon, lat in zip(list(x), list(y)):
                coords.append([lon, lat]) 
            layer = dict(sourcetype = 'geojson',
                         source={ "type": "Feature",
                                 "geometry": {"type": "LineString",
                                              "coordinates": coords
                                              }
                                },
                         color = 'rgb(255,' + str(int(green)) + ', 0)',
                         type = 'line',
                         below = '',
                         opacity = (1 - scale_factor) * opacity_scale,
                         line = dict(width=2 * opacity_scale),
                        )
            ellipse_layers.append(layer)
    return ellipse_layers

In [None]:
def update_data(location, location_type, n_clicks, crime_type):
    data = {
        'n_clicks': n_clicks,
        'Location Type': location_type,
        'Location': location,
        'Crime Type': crime_type
    }
    
    return data

In [None]:
def line_chart(df, title):
    
    df_copy = df.copy()
    reg = LinearRegression().fit(df_copy['Year'].values.reshape((-1,1)), df_copy['Count'])
    trend = reg.predict(df_copy['Year'].values.reshape((-1,1)))
    
    dict_trend = {
        'Year': df_copy['Year'].to_list(),
        'Count': list(trend)
    }
    
    df_trend = pd.DataFrame.from_dict(dict_trend)
    
    df_copy['Data'] = 'Number of Crimes'
    df_trend['Data'] = 'Trend'
    
    df_copy = pd.concat([df_copy, df_trend], axis=0)
    df_copy.rename(columns={'Count': 'Number of Crimes'}, inplace=True)
    
    fig_chart = px.line(
        data_frame = df_copy,
        x = 'Year',
        y = 'Number of Crimes',
        color = 'Data',
        title = title
    )
    
    max_count = df_copy['Number of Crimes'].max()
    
    fig_chart.update_layout(yaxis_range=[0, max_count * 1.1])
    
    return fig_chart

In [None]:
def charts_address(address, miles, crime_type):
    gmm_data = cCluster.GMM_Address(address, miles, crime_type)
    num_centers = gmm_data[0]
    df_centers = pd.DataFrame(gmm_data[1], columns=['lat', 'lon'])
    covariances = gmm_data[2]
    n_per_cluster = gmm_data[4]
    location = gmm_data[5]
    lat = location.latitude
    lon = location.longitude
    
#     df_centers['size'] = 1
    
    df_address = pd.DataFrame.from_dict({
        'lat': [location.latitude],
        'lon': [location.longitude]
    })
    
#     df_address['size'] = 100

    fig_map = px.scatter_mapbox(
            df_address,
            lat='lat',
            lon='lon',
            zoom=12,
            mapbox_style='carto-positron',
            size=[3],
        )
    
    ellipse_layers = ellipses(df_centers, covariances, n_per_cluster)
#     df_centers = pd.concat([df_centers, df_address], axis=0)
#     df_centers.reset_index(drop=True, inplace=True)



    fig_map.layout.update(mapbox_layers=ellipse_layers)
    
    df_crime_filtered = gmm_data[3]
    
    df_chart = df_crime_filtered.groupby(['Year'], as_index=False)\
        .agg({'Latitude': 'count'}).rename(columns={'Latitude': 'Count'})

    fig_chart = line_chart(df_chart, 'Address: {}'.format(address))

    return fig_chart, fig_map, df_centers.to_json()

In [None]:
def charts_neigh_zip(location, locationSelectValue, crime_type):
    if locationSelectValue == 'Neighborhood':
        df_crime_chart = df_crime_neigh[df_crime_neigh['Crime Type'] == crime_type]
        column = 'Neighborhood'
    else:
        df_crime_chart = df_crime_zip[df_crime_zip['Crime Type'] == crime_type]
        column = 'Zip Code'

    # location = clickData['points'][0]['location']
    df_crime_chart = df_crime_chart[(df_crime_chart[column] == location)]

    gmm_data = cCluster.GMM(location, crime_type)
    num_centers = gmm_data[0]
    df_centers = pd.DataFrame(gmm_data[1], columns=['lat', 'lon'])
    covariances = gmm_data[2]

    fig_map = px.scatter_mapbox(
        df_centers,
        lat='lat',
        lon='lon',
        zoom=12,
        mapbox_style='carto-positron'
    )

    ellipse_layers = ellipses(df_centers, covariances)
    fig_map.layout.update(mapbox_layers=ellipse_layers)

    fig_chart = line_chart(df_crime_chart, '{0}: {1}'.format(column, location))
    
    return fig_chart, fig_map, df_centers.to_json()

In [None]:
def charts_chicago(crime_type):
    # gmm_data = cCluster.GMM_Chicago(crime_type)
    
    gmm_data = df_chicago_clusters[df_chicago_clusters['Crime Type'] == crime_type].to_dict(orient='records')[0]
    num_centers = gmm_data['num centers']
    df_centers = pd.DataFrame(gmm_data['centers'], columns=['lat', 'lon'])
    covariances = gmm_data['covariances']


    fig_map = px.scatter_mapbox(
        df_centers,
        lat='lat',
        lon='lon',
        zoom=9,
        mapbox_style='carto-positron'
    )

    ellipse_layers = ellipses(df_centers, covariances)
    fig_map.layout.update(mapbox_layers=ellipse_layers)
    
    df_crime_chicago_filtered = df_crime_chicago[df_crime_chicago['Crime Type'] == crime_type]

    fig_chart = line_chart(df_crime_chicago_filtered, 'Chicago')

    return fig_chart, fig_map, df_centers.to_json()

In [None]:
@app.callback(
    [Output('crime-chart', 'figure'),
    Output('crime-cluster-map', 'figure'),
    Output('crime-clusters', 'children'),
    Output('callback-data', 'children'),
    Output('house-image', 'children'),
    Output('price', 'children')],
    [Input('chicago-map', 'clickData'),
    Input('location-select', 'value'),
    Input('btn-submit-address', 'n_clicks'),
    Input('crime-types', 'value')],
    [State('input-address', 'value'),
    State('input-miles', 'value'),
    State('callback-data', 'children'),
    State('house-image', 'children'),
    State('price', 'children')]
)
def update_crime_chart(clickData, locationSelectValue, n_clicks, crime_type, address, miles, data, img, price):
    
    if data is None:
        data = update_data(clickData, locationSelectValue, n_clicks, crime_type)
        fresh_load = True
    else:
        data = json.loads(data)
        fresh_load = False
        
    if fresh_load:
        # Fresh load of the dashboard.
        charts = charts_chicago(crime_type)
        data = update_data('Chicago', locationSelectValue, n_clicks, crime_type)
        
    if n_clicks != data['n_clicks']:
        # Submit button was clicked.
        charts = charts_address(address, miles, crime_type)
        house_data = address_to_price_and_image(address)
        if house_data == 'INVALID ADDRESS':
            #handle invalid address here
            img = html.Div(style={'height': 200})
            price = 'INVALID ADDRESS'
        elif house_data == 'HOUSE NOT FOUND':
            #handle house not on Zillow
            img = html.Div(style={'height': 200})
            price = 'NO ZILLOW LISTING'
        else:
            price = 'Zestimate: {}'.format(house_data[0])
            image_src = house_data[1]
            img = html.Img(src=src_image(image_src, URL=True), style={'height': 200})
            data = update_data(address, 'Address', n_clicks, crime_type)
        
    
    elif crime_type != data['Crime Type']:
        # The crime type was changed.
        if data['Location Type'] == 'Address':
            charts = charts_address(address, miles, crime_type)
            data = update_data(address, 'Address', n_clicks, crime_type, src)
        elif data['Location'] == 'Chicago':
            charts = charts_chicago(crime_type)
            data = update_data('Chicago', locationSelectValue, n_clicks, crime_type)
        else:
            location = data['Location']
            location_type = data['Location Type']
            charts = charts_neigh_zip(location, location_type, crime_type)
            data = update_data(location, location_type, n_clicks, crime_type, src)
    
    elif locationSelectValue != data['Location Type'] and crime_type == data['Crime Type']:
        charts = charts_chicago(crime_type)
        data = update_data('Chicago', locationSelectValue, n_clicks, crime_type)
            
    elif clickData is not None:
        # Either a neighborhood or zip code was clicked.
        location = clickData['points'][0]['location']
        location_type = locationSelectValue
        charts = charts_neigh_zip(location, location_type, crime_type)
        data = update_data(location, location_type, n_clicks, crime_type)
    
    else:
        charts = charts_chicago(crime_type)
        data = update_data('Chicago', locationSelectValue, n_clicks, crime_type)
        
    values = list(charts)
    values.append(json.dumps(data))
    values.append(img)
    values.append(price)
        
    return tuple(values)

In [None]:
if __name__ == '__main__':
    app.run_server()

In [None]:
# app.run_server(debug=True)

In [None]:
# app.run_server(mode="inline")

In [None]:
# app.run_server(mode="jupyterlab")

In [None]:
# app.run_server()

In [None]:
# Run to terminate the server if necessary:
# app._terminate_server_for_port("localhost", 8050)