In [96]:
# importing altair and pandas for use in visualizations.
import altair as alt
import pandas as pd

# loading the crime csv from the zip file and displaying it's head
# this fails if the macosx folder remains in the zip
crime = pd.read_csv("crime.csv.zip", encoding="ISO-8859-1", compression='zip')
crime.head()

Unnamed: 0,INCIDENT_NUMBER,OFFENSE_CODE,OFFENSE_CODE_GROUP,OFFENSE_DESCRIPTION,DISTRICT,REPORTING_AREA,SHOOTING,OCCURRED_ON_DATE,YEAR,MONTH,DAY_OF_WEEK,HOUR,UCR_PART,STREET,Lat,Long,Location
0,I182070945,619,Larceny,LARCENY ALL OTHERS,D14,808,,2018-09-02 13:00:00,2018,9,Sunday,13,Part One,LINCOLN ST,42.357791,-71.139371,"(42.35779134, -71.13937053)"
1,I182070943,1402,Vandalism,VANDALISM,C11,347,,2018-08-21 00:00:00,2018,8,Tuesday,0,Part Two,HECLA ST,42.306821,-71.0603,"(42.30682138, -71.06030035)"
2,I182070941,3410,Towed,TOWED MOTOR VEHICLE,D4,151,,2018-09-03 19:27:00,2018,9,Monday,19,Part Three,CAZENOVE ST,42.346589,-71.072429,"(42.34658879, -71.07242943)"
3,I182070940,3114,Investigate Property,INVESTIGATE PROPERTY,D4,272,,2018-09-03 21:16:00,2018,9,Monday,21,Part Three,NEWCOMB ST,42.334182,-71.078664,"(42.33418175, -71.07866441)"
4,I182070938,3114,Investigate Property,INVESTIGATE PROPERTY,B3,421,,2018-09-03 21:05:00,2018,9,Monday,21,Part Three,DELHI ST,42.275365,-71.090361,"(42.27536542, -71.09036101)"


In [97]:
# generating the pre-filter set of street to offense code group to street count dataframes
# each street counts the number of specific offenses on that street alongside the total count for the street
group_crime = crime.groupby(["STREET", "OFFENSE_CODE_GROUP"]).size().reset_index(name="COUNT")
group_crime["STREET_COUNT"] = group_crime.groupby(["STREET"])["COUNT"].transform("sum")
group_crime

Unnamed: 0,STREET,OFFENSE_CODE_GROUP,COUNT,STREET_COUNT
0,ALBANY ST,Aggravated Assault,2,42
1,ALBANY ST,Drug Violation,9,42
2,ALBANY ST,Evading Fare,1,42
3,ALBANY ST,Investigate Person,2,42
4,ALBANY ST,Larceny,3,42
...,...,...,...,...
52813,ZEIGLER ST,Verbal Disputes,13,190
52814,ZEIGLER ST,Violations,2,190
52815,ZEIGLER ST,Warrant Arrests,9,190
52816,ZELLER ST,Motor Vehicle Accident Response,1,2


In [98]:
#filtering the dataset to allow altair to visualize the data
#selecting streets with a number of crimes greater than 700 and condensing the data down to only Agg. Assault, Homicide, Robbery/Larceny, and Evading Fares

filtered_group_crime = group_crime.loc[group_crime["STREET_COUNT"] >= 700].reset_index()

filtered_group_crime = filtered_group_crime[(filtered_group_crime["OFFENSE_CODE_GROUP"] == 'Aggravated Assault') 
                                            | (filtered_group_crime["OFFENSE_CODE_GROUP"] == 'Homicide') 
                                            | (filtered_group_crime["OFFENSE_CODE_GROUP"] == 'Robbery')
                                            | (filtered_group_crime["OFFENSE_CODE_GROUP"] == 'Larceny') 
                                            | (filtered_group_crime["OFFENSE_CODE_GROUP"] == 'Evading Fare')]


filtered_group_crime

Unnamed: 0,index,STREET,OFFENSE_CODE_GROUP,COUNT,STREET_COUNT
0,283,ADAMS ST,Aggravated Assault,35,2042
12,295,ADAMS ST,Evading Fare,2,2042
19,302,ADAMS ST,Homicide,1,2042
23,306,ADAMS ST,Larceny,85,2042
45,328,ADAMS ST,Robbery,31,2042
...,...,...,...,...,...
2700,51362,WILLIAM T MORRISSEY BLVD,Larceny,153,774
2718,51380,WILLIAM T MORRISSEY BLVD,Robbery,10,774
2725,51759,WINTER ST,Aggravated Assault,23,725
2739,51773,WINTER ST,Larceny,212,725


In [99]:
#disabling max row restrictions to allow visualization

alt.data_transformers.disable_max_rows()

# adding a selection for crime type
selection = alt.selection_single(fields=["OFFENSE_CODE_GROUP"], bind="legend")

bar_w_outliers = alt.Chart(filtered_group_crime).mark_bar().encode(y='STREET:N', x='STREET_COUNT:Q', color=alt.condition(selection, 'OFFENSE_CODE_GROUP', alt.value('lightgrey')), tooltip=['STREET', 'STREET_COUNT', 'OFFENSE_CODE_GROUP', 'COUNT']).add_selection(selection).properties(title="Bar Plot of 5 Major Crime Types on a Street-by-Street Level")

#bar_w_outliers
#bar.save("temp.html")

  selection = alt.selection_single(fields=["OFFENSE_CODE_GROUP"], bind="legend")
  bar_w_outliers = alt.Chart(filtered_group_crime).mark_bar().encode(y='STREET:N', x='STREET_COUNT:Q', color=alt.condition(selection, 'OFFENSE_CODE_GROUP', alt.value('lightgrey')), tooltip=['STREET', 'STREET_COUNT', 'OFFENSE_CODE_GROUP', 'COUNT']).add_selection(selection)


In [100]:
bar_w_outliers.save("outlier_streetwise_totals.html")

In [101]:
#removing the outlier street of washington street

filtered_group_crime_no_outliers = filtered_group_crime[filtered_group_crime["STREET"] != "WASHINGTON ST"]

filtered_group_crime_no_outliers

Unnamed: 0,index,STREET,OFFENSE_CODE_GROUP,COUNT,STREET_COUNT
0,283,ADAMS ST,Aggravated Assault,35,2042
12,295,ADAMS ST,Evading Fare,2,2042
19,302,ADAMS ST,Homicide,1,2042
23,306,ADAMS ST,Larceny,85,2042
45,328,ADAMS ST,Robbery,31,2042
...,...,...,...,...,...
2700,51362,WILLIAM T MORRISSEY BLVD,Larceny,153,774
2718,51380,WILLIAM T MORRISSEY BLVD,Robbery,10,774
2725,51759,WINTER ST,Aggravated Assault,23,725
2739,51773,WINTER ST,Larceny,212,725


In [102]:
# adding a selection for offense group
selection = alt.selection_single(fields=["OFFENSE_CODE_GROUP"], bind="legend")

bar_w_o_outliers = alt.Chart(filtered_group_crime_no_outliers).mark_bar().encode(y='STREET:N', x='STREET_COUNT:Q', color=alt.condition(selection, 'OFFENSE_CODE_GROUP', alt.value('lightgrey')), tooltip=['STREET', 'STREET_COUNT', 'OFFENSE_CODE_GROUP', 'COUNT']).add_selection(selection).properties(title="Bar Plot of 5 Major Crime Types on a Street-by-Street Level without Washington Street Outlier")

#bar_w_o_outliers

  selection = alt.selection_single(fields=["OFFENSE_CODE_GROUP"], bind="legend")
  bar_w_o_outliers = alt.Chart(filtered_group_crime_no_outliers).mark_bar().encode(y='STREET:N', x='STREET_COUNT:Q', color=alt.condition(selection, 'OFFENSE_CODE_GROUP', alt.value('lightgrey')), tooltip=['STREET', 'STREET_COUNT', 'OFFENSE_CODE_GROUP', 'COUNT']).add_selection(selection)


In [103]:
bar_w_o_outliers.save("no_outlier_streetwise_totals.html")

In [104]:
#a visualization of all offenses with filter

filtered_group_crime_all_offenses = group_crime.loc[group_crime["STREET_COUNT"] >= 700].reset_index()

filtered_group_crime_all_offenses

Unnamed: 0,index,STREET,OFFENSE_CODE_GROUP,COUNT,STREET_COUNT
0,283,ADAMS ST,Aggravated Assault,35,2042
1,284,ADAMS ST,Assembly or Gathering Violations,2,2042
2,285,ADAMS ST,Auto Theft,33,2042
3,286,ADAMS ST,Auto Theft Recovery,10,2042
4,287,ADAMS ST,Ballistics,11,2042
...,...,...,...,...,...
2758,51792,WINTER ST,Towed,3,725
2759,51793,WINTER ST,Vandalism,14,725
2760,51794,WINTER ST,Verbal Disputes,13,725
2761,51795,WINTER ST,Violations,5,725


In [105]:
# adding a selection for offense group
selection = alt.selection_single(fields=["OFFENSE_CODE_GROUP"], bind="legend")

all_offense_bar = alt.Chart(filtered_group_crime_all_offenses).mark_bar().encode(y='STREET:N', x='STREET_COUNT:Q', color=alt.condition(selection, 'OFFENSE_CODE_GROUP', alt.value('lightgrey')), tooltip=['STREET', 'STREET_COUNT', 'OFFENSE_CODE_GROUP', 'COUNT']).add_selection(selection).properties(title="Bar Plot of All Crime Types on a Street-by-Street Level")

#all_offense_bar

  selection = alt.selection_single(fields=["OFFENSE_CODE_GROUP"], bind="legend")
  all_offense_bar = alt.Chart(filtered_group_crime_all_offenses).mark_bar().encode(y='STREET:N', x='STREET_COUNT:Q', color=alt.condition(selection, 'OFFENSE_CODE_GROUP', alt.value('lightgrey')), tooltip=['STREET', 'STREET_COUNT', 'OFFENSE_CODE_GROUP', 'COUNT']).add_selection(selection)


In [106]:
all_offense_bar.save("all_offense_bar.html")