In [31]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import altair as alt
import folium
import os
import folium.plugins as plugins
import geopandas as gpd

In [32]:
alt.renderers.enable('notebook')
alt.data_transformers.disable_max_rows()

#Setup altair to save graphs to external file. This breaks Vega but is needed if notebook gets too large.

#def json_dir(data, data_dir = 'altairdata'):
    #os.makedirs(data_dir, exist_ok = True)
    #return alt.pipe(data, alt.to_json(filename = data_dir + '/{prefix}-{hash}.{extension}'))
#alt.data_transformers.register('json_dir', json_dir)
#alt.data_transformers.enable('json_dir', data_dir = 'mydata')

DataTransformerRegistry.enable('default')

In [33]:
df = pd.read_csv('Complaints.csv')
arrestReports = pd.read_csv('NYPD_Complaint_Data_Current__Year_To_Date_.csv')
precinctMap = gpd.read_file('PolicePrecincts.geojson')
df['complainant_ethnicity'] = df['complainant_ethnicity'].fillna('Unknown')
df['command_at_incident'] = df['command_at_incident'].fillna('Unknown')
df = df.replace({'precinct': {1000:22}})

  interactivity=interactivity, compiler=compiler, result=result)


In [34]:
arrestReports = arrestReports[['VIC_RACE','VIC_AGE_GROUP','Latitude','Longitude']]
arrestReports = arrestReports.drop(arrestReports[arrestReports['VIC_RACE'] == 'UNKNOWN'].index).reset_index()
geoArrestReports = gpd.GeoDataFrame(arrestReports, geometry = gpd.points_from_xy(arrestReports.Longitude,arrestReports.Latitude))

In [35]:
geoArrestReports.crs = precinctMap.crs
geoArrestReports = gpd.sjoin(geoArrestReports,precinctMap,how = 'inner')

In [36]:
precincts = pd.DataFrame(columns = ['precinct'])
precincts['precinct'] = np.sort(df['precinct'].unique())
precincts = precincts.join(df.groupby(['precinct']).size().to_frame(name = 'Number of Complaints'),on = 'precinct')
keys = precincts['precinct'].unique()
values = [190,141,164,142,178,162,190,307,141,294,207,
         149,120,176,156,210,134,167,142,205,191,201,
         284,186,221,293,358,174,306,280,252,193,133,
         282,202,167,154,155,138,333,138,161,303,216,
         179,275,399,139,235,198,247,197,209,221,155,
         185,142,145,206,209,254,182,310,225,197,148,
         292,211,153,144,236,261,234,311,196,195,144]
substantiatedCols = ['Substantiated (Charges)','Substantiated (Command Discipline A)',
       'Substantiated (Formalized Training)',
       'Substantiated (Command Lvl Instructions)',
       'Substantiated (Instructions)',
       'Substantiated (Command Discipline)',
       'Substantiated (Command Discipline B)',
       'Substantiated (No Recommendations)',
       'Substantiated (MOS Unidentified)']

In [38]:
UniformedDict = dict(zip(keys,values))
def amtUniformed(precinct):
    return UniformedDict[precinct]

In [39]:
precincts['Uniformed Officers'] = precincts['precinct']
precincts['Uniformed Officers'] = precincts['Uniformed Officers'].apply(amtUniformed)
precincts['Complaints per Officer'] = precincts['Number of Complaints'] / precincts['Uniformed Officers']

In [40]:
minoritydf = pd.pivot_table(geoArrestReports,values = ['Latitude'], index = ['precinct'], columns = ['VIC_RACE'], aggfunc = 'count')
minoritydf['total'] = minoritydf.sum(1)
minoritydf['minority total'] = minoritydf['Latitude'][['BLACK','BLACK HISPANIC','WHITE HISPANIC']].sum(1)
minoritydf = minoritydf.drop(['Latitude'], axis = 1)
minoritydf['ratio'] = minoritydf['minority total']/minoritydf['total']
minoritydf = minoritydf.reset_index()

outdf = pd.pivot_table(df,values = ['first_name'],index = ['precinct'],columns = ['board_disposition'],aggfunc = 'count')['first_name']
outdf = outdf.assign(Substantiated = outdf[substantiatedCols].sum(1)).drop(substantiatedCols,1)

yeardf = pd.pivot_table(df,values = ['year_received'],index = ['precinct'],aggfunc = ['min','max'])

uniqueIDdf = pd.pivot_table(df,values = ['first_name','year_received'], 
                            index = ['precinct','unique_mos_id'],
                            aggfunc ={'first_name':'count','year_received':'max'}).sort_values(by = 'first_name')

precincts = precincts.join(outdf, on = 'precinct', how = 'inner')
precincts = precincts.join(yeardf, on = 'precinct', how = 'inner')

precincts['precinct'] = precincts['precinct'].astype(str)



In [41]:
descriptionDict = {}

# Same as above but showing the top crimes and arrest descriptions for each zip
for i in keys:
    source = df.loc[df['precinct'] == i]
    substantiatedCols = source['board_disposition'].unique()
    substantiatedCols = substantiatedCols[(substantiatedCols != 'Exonerated') & (substantiatedCols != 'Unsubstantiated')]
    precinctdf = pd.pivot_table(source,
              values = ['first_name'],
              index = ['unique_mos_id'],
              columns = ['board_disposition'],aggfunc = 'count')['first_name']
    precinctdf = precinctdf.assign(Substantiated = precinctdf[substantiatedCols].sum(1)).drop(substantiatedCols,1)
    precinctdf = precinctdf.fillna(0)
    precinctdf['total'] = precinctdf['Exonerated'] + precinctdf['Unsubstantiated'] + precinctdf['Substantiated']
    precinctdf['percent'] = precinctdf['Substantiated']/precinctdf['total']
    precinctdf = precinctdf.reset_index()

    source = df.merge(precinctdf,on = 'unique_mos_id', how = 'inner')

    selector = alt.selection_single(empty = 'all', fields = ['unique_mos_id'])

    base = alt.Chart(source).properties(
        width = 300,
        height = 350
    ).add_selection(selector)

    points = base.mark_point(filled = True, size = 20).encode(
        x = alt.X('total:Q', axis = alt.Axis(title = 'Total number of complaints at precinct')),
        y = alt.Y('percent:Q', axis = alt.Axis(title = 'Percentage of complaints that were substantiated')),
        tooltip = ['first_name','last_name','Substantiated','Exonerated','Unsubstantiated'],
    )

    hists = base.mark_bar(opacity = 0.5, thickness = 100).encode(
        x = alt.X('year_received:O', axis = alt.Axis(title = 'Year complaints were received')),
        y = alt.Y('count(year_received)', axis = alt.Axis(title = 'Number of complaints')),
        color = 'board_disposition'
    ).transform_filter(
        selector
    )
    
    descriptionDict[i] = points|hists

In [44]:
# Very similar process to the labeled choropleth above but using vegalite and with more containers
choroplethGraph = folium.Map([40.719435, -74.001930], max_zoom = 12)
precinctLayer = folium.FeatureGroup(name = 'Graphs')
descriptionLayer = folium.FeatureGroup(name = 'Descriptions', show = False)
transparent = {'fillColor': '#00000000', 'color': '#00000000'}
folium.Choropleth(
    geo_data = precinctMap,
    name = 'Minority Ratio',
    data = minoritydf,
    columns = ['precinct', 'ratio'],
    key_on = 'feature.properties.precinct',
    fill_color = 'BuPu',
    fill_opacity = 0.7,
    line_opacity = 0.2,
    legend_name = 'Minority Ratio').add_to(choroplethGraph)

for i in keys:
    gsP = folium.GeoJson(precinctMap.loc[precinctMap['precinct'] == str(i)])
    folium.Popup(html = str(i), max_width = 800, sticky = True).add_child(
        folium.VegaLite(descriptionDict[i], width = 800, height = 400)).add_to(gsP)
    precinctLayer.add_child(gsP)
    
    datarow = precincts.loc[precincts['precinct'] == str(i)].reset_index()
    officerrow = uniqueIDdf.loc[i]
    gs = folium.GeoJson(precinctMap.loc[precinctMap['precinct'] == str(i)], style_function = lambda x: transparent)
    precinctlabel = i
    officerslabel = datarow['Uniformed Officers'][0]
    complaintslabel = datarow['Number of Complaints'][0]
    substantiatedlabel = datarow['Substantiated'][0]
    officersreceivelabel = uniqueIDdf.loc[i]['first_name'].size
    yearmin = datarow[precincts.columns[7]][0]
    yearmax = datarow[precincts.columns[8]][0]
    maxcomplaints = uniqueIDdf.loc[i].iloc[-1]['first_name']
    popuphtml = """
            <html>
                <body>
                    <h1>%s</h1>
                    <p>Number of Uniformed Officers: %s</p>
                    <p>Number of Complaints: %s</p>
                    <p>Number of Substantiated Complaints: %s</p>
                    <p>Number of Officers who received complaints: %s</p>
                    <p>Years of Complaints: %s - %s</p>
                    <p>Max complaints for a Single Officer: %s</p>
                </body>
            </html>"""%(precinctlabel, str(officerslabel), str(complaintslabel), str(substantiatedlabel), 
                        str(officersreceivelabel), str(yearmin), str(yearmax), str(maxcomplaints))
    folium.Popup(popuphtml,max_width = 250, sticky = True).add_to(gs)
    gs.add_to(descriptionLayer)
    
    
    
choroplethGraph.add_child(precinctLayer)
choroplethGraph.add_child(descriptionLayer)
folium.LayerControl().add_to(choroplethGraph)

<folium.map.LayerControl at 0x294a2e76b88>

In [45]:
choroplethGraph.save('ChoroplethGraph.html')