In [201]:
# importing altair and pandas for use in visualizations.
import altair as alt
import pandas as pd

# loading the crime csv from the zip file and displaying it's head
# this fails if the macosx folder remains in the zip
crime = pd.read_csv("crime.csv.zip", encoding="ISO-8859-1", compression='zip')
crime.head()

Unnamed: 0,INCIDENT_NUMBER,OFFENSE_CODE,OFFENSE_CODE_GROUP,OFFENSE_DESCRIPTION,DISTRICT,REPORTING_AREA,SHOOTING,OCCURRED_ON_DATE,YEAR,MONTH,DAY_OF_WEEK,HOUR,UCR_PART,STREET,Lat,Long,Location
0,I182070945,619,Larceny,LARCENY ALL OTHERS,D14,808,,2018-09-02 13:00:00,2018,9,Sunday,13,Part One,LINCOLN ST,42.357791,-71.139371,"(42.35779134, -71.13937053)"
1,I182070943,1402,Vandalism,VANDALISM,C11,347,,2018-08-21 00:00:00,2018,8,Tuesday,0,Part Two,HECLA ST,42.306821,-71.0603,"(42.30682138, -71.06030035)"
2,I182070941,3410,Towed,TOWED MOTOR VEHICLE,D4,151,,2018-09-03 19:27:00,2018,9,Monday,19,Part Three,CAZENOVE ST,42.346589,-71.072429,"(42.34658879, -71.07242943)"
3,I182070940,3114,Investigate Property,INVESTIGATE PROPERTY,D4,272,,2018-09-03 21:16:00,2018,9,Monday,21,Part Three,NEWCOMB ST,42.334182,-71.078664,"(42.33418175, -71.07866441)"
4,I182070938,3114,Investigate Property,INVESTIGATE PROPERTY,B3,421,,2018-09-03 21:05:00,2018,9,Monday,21,Part Three,DELHI ST,42.275365,-71.090361,"(42.27536542, -71.09036101)"


In [202]:
# generating the pre-filter set of street to offense code group to street count dataframes
# each street counts the number of specific offenses on that street alongside the total count for the street
group_crime = crime.groupby(["STREET", "OFFENSE_CODE_GROUP"]).size().reset_index(name="COUNT")
group_crime["STREET_COUNT"] = group_crime.groupby(["STREET"])["COUNT"].transform("sum")
group_crime.head()

Unnamed: 0,STREET,OFFENSE_CODE_GROUP,COUNT,STREET_COUNT
0,ALBANY ST,Aggravated Assault,2,42
1,ALBANY ST,Drug Violation,9,42
2,ALBANY ST,Evading Fare,1,42
3,ALBANY ST,Investigate Person,2,42
4,ALBANY ST,Larceny,3,42


In [203]:
#filtering the dataset to allow altair to visualize the data
#selecting streets with a number of crimes greater than 700 and condensing the data down to only Agg. Assault, Homicide, Robbery/Larceny, and Evading Fares

filtered_group_crime = group_crime.loc[group_crime["STREET_COUNT"] >= 700].reset_index()

filtered_group_crime = filtered_group_crime[(filtered_group_crime["OFFENSE_CODE_GROUP"] == 'Aggravated Assault') 
                                            | (filtered_group_crime["OFFENSE_CODE_GROUP"] == 'Homicide') 
                                            | (filtered_group_crime["OFFENSE_CODE_GROUP"] == 'Robbery')
                                            | (filtered_group_crime["OFFENSE_CODE_GROUP"] == 'Larceny') 
                                            | (filtered_group_crime["OFFENSE_CODE_GROUP"] == 'Evading Fare')]

filtered_group_crime = filtered_group_crime.sort_values("STREET_COUNT", ascending=False)

# filtering only relevant data from the 20 streets with the most crime

#sort
filtered_group_crime_temp = filtered_group_crime.sort_values("STREET_COUNT", ascending=False).drop_duplicates("STREET_COUNT")

#select 20 largest streets
filtered_group_crime_temp = filtered_group_crime_temp.nlargest(20, "STREET_COUNT")

#get mask for 20 largest streets
filtered_group_crime_temp2 = filtered_group_crime_temp["STREET_COUNT"].isin(filtered_group_crime_temp["STREET_COUNT"])

#select 20 largest streets
filtered_group_crime_top_10_select = filtered_group_crime_temp.loc[filtered_group_crime_temp2 == True]

#get mask for 20 largest streets
filtered_group_crime_temp3 = filtered_group_crime["STREET"].isin(filtered_group_crime_top_10_select["STREET"])

#get all crimes on 20 largest streets
filtered_group_crime_top_10 = filtered_group_crime.loc[filtered_group_crime_temp3 == True]

filtered_group_crime_top_10.head()


Unnamed: 0,index,STREET,OFFENSE_CODE_GROUP,COUNT,STREET_COUNT
2670,49768,WASHINGTON ST,Robbery,230,14194
2647,49745,WASHINGTON ST,Larceny,1933,14194
2642,49740,WASHINGTON ST,Homicide,4,14194
2634,49732,WASHINGTON ST,Evading Fare,17,14194
2620,49718,WASHINGTON ST,Aggravated Assault,370,14194


In [204]:
#disabling max row restrictions to allow visualization

alt.data_transformers.disable_max_rows()

# adding a selection for crime type
selection = alt.selection_single(fields=["OFFENSE_CODE_GROUP"], bind="legend")

bar_w_outliers = alt.Chart(filtered_group_crime_top_10).mark_bar().encode(y='STREET:N', x='COUNT:Q', color=alt.condition(selection, 'OFFENSE_CODE_GROUP', alt.value('lightgrey')), tooltip=['STREET', 'STREET_COUNT', 'OFFENSE_CODE_GROUP', 'COUNT']).add_selection(selection).properties(title="Bar Plot of 5 Major Crime Types on a Street-by-Street Level")

# bar_w_outliers
#bar.save("temp.html")

  selection = alt.selection_single(fields=["OFFENSE_CODE_GROUP"], bind="legend")
  bar_w_outliers = alt.Chart(filtered_group_crime_top_10).mark_bar().encode(y='STREET:N', x='COUNT:Q', color=alt.condition(selection, 'OFFENSE_CODE_GROUP', alt.value('lightgrey')), tooltip=['STREET', 'STREET_COUNT', 'OFFENSE_CODE_GROUP', 'COUNT']).add_selection(selection).properties(title="Bar Plot of 5 Major Crime Types on a Street-by-Street Level")


In [205]:
bar_w_outliers.save("outlier_streetwise_totals.html")

In [206]:
#removing the outlier street of washington street

filtered_group_crime_no_outliers = filtered_group_crime_top_10[filtered_group_crime_top_10["STREET"] != "WASHINGTON ST"]

filtered_group_crime_no_outliers.head()

Unnamed: 0,index,STREET,OFFENSE_CODE_GROUP,COUNT,STREET_COUNT
339,5175,BLUE HILL AVE,Aggravated Assault,279,7794
385,5221,BLUE HILL AVE,Robbery,200,7794
363,5199,BLUE HILL AVE,Larceny,536,7794
359,5195,BLUE HILL AVE,Homicide,3,7794
352,5188,BLUE HILL AVE,Evading Fare,8,7794


In [207]:
# adding a selection for offense group
selection = alt.selection_single(fields=["OFFENSE_CODE_GROUP"], bind="legend")

bar_w_o_outliers = alt.Chart(filtered_group_crime_no_outliers).mark_bar().encode(y='STREET:N', x='COUNT:Q', color=alt.condition(selection, 'OFFENSE_CODE_GROUP', alt.value('lightgrey')), tooltip=['STREET', 'STREET_COUNT', 'OFFENSE_CODE_GROUP', 'COUNT']).add_selection(selection).properties(title="Bar Plot of 5 Major Crime Types on a Street-by-Street Level without Washington Street Outlier")

# bar_w_o_outliers

  selection = alt.selection_single(fields=["OFFENSE_CODE_GROUP"], bind="legend")
  bar_w_o_outliers = alt.Chart(filtered_group_crime_no_outliers).mark_bar().encode(y='STREET:N', x='COUNT:Q', color=alt.condition(selection, 'OFFENSE_CODE_GROUP', alt.value('lightgrey')), tooltip=['STREET', 'STREET_COUNT', 'OFFENSE_CODE_GROUP', 'COUNT']).add_selection(selection).properties(title="Bar Plot of 5 Major Crime Types on a Street-by-Street Level without Washington Street Outlier")


In [208]:
bar_w_o_outliers.save("no_outlier_streetwise_totals.html")

In [209]:
#a visualization of all offenses with filter

filtered_group_crime_all_offenses = group_crime.loc[group_crime["STREET_COUNT"] >= 700].reset_index()

filtered_group_crime_all_offenses = group_crime.loc[group_crime["COUNT"] >= 50].reset_index()

filtered_group_crime = filtered_group_crime_all_offenses.sort_values("STREET_COUNT", ascending=False)

# filtering only relevant data from the 20 streets with the most crime

#sort
filtered_group_crime_temp = filtered_group_crime.sort_values("STREET_COUNT", ascending=False).drop_duplicates("STREET_COUNT")

#select 20 largest streets
filtered_group_crime_temp = filtered_group_crime_temp.nlargest(20, "STREET_COUNT")

#get mask for 20 largest streets
filtered_group_crime_temp2 = filtered_group_crime_temp["STREET_COUNT"].isin(filtered_group_crime_temp["STREET_COUNT"])

#select 20 largest streets
filtered_group_crime_top_10_select = filtered_group_crime_temp.loc[filtered_group_crime_temp2 == True]

#get mask for 20 largest streets
filtered_group_crime_temp3 = filtered_group_crime["STREET"].isin(filtered_group_crime_top_10_select["STREET"])

#get all crimes on 20 largest streets
filtered_group_crime_top_10 = filtered_group_crime.loc[filtered_group_crime_temp3 == True]

filtered_group_crime_top_10.head()

# filtered_group_crime_all_offenses

Unnamed: 0,index,STREET,OFFENSE_CODE_GROUP,COUNT,STREET_COUNT
718,49761,WASHINGTON ST,Property Found,242,14194
701,49730,WASHINGTON ST,Drug Violation,1222,14194
709,49746,WASHINGTON ST,Larceny From Motor Vehicle,278,14194
708,49745,WASHINGTON ST,Larceny,1933,14194
707,49743,WASHINGTON ST,Investigate Property,442,14194


In [210]:
# adding a selection for offense group
selection = alt.selection_single(fields=["OFFENSE_CODE_GROUP"], bind="legend")

#adding slider bar to allow visualization of smaller or larger crimes

slider = alt.binding_range(min=0, max=2500, step=10)
cutoff = alt.param(bind=slider, value=0)
predicate = alt.datum.COUNT < cutoff

all_offense_bar = alt.Chart(filtered_group_crime_top_10).mark_bar().encode(y='STREET:N', x='COUNT:Q', color=alt.condition(selection, 'OFFENSE_CODE_GROUP', alt.value('lightgrey')), opacity = alt.when(predicate).then(alt.value(1)).otherwise(alt.value(0.1)), tooltip=['STREET', 'STREET_COUNT', 'OFFENSE_CODE_GROUP', 'COUNT']).add_selection(selection, cutoff).properties(title="Bar Plot of All Crime Types on a Street-by-Street Level")

# all_offense_bar

  selection = alt.selection_single(fields=["OFFENSE_CODE_GROUP"], bind="legend")
  all_offense_bar = alt.Chart(filtered_group_crime_top_10).mark_bar().encode(y='STREET:N', x='COUNT:Q', color=alt.condition(selection, 'OFFENSE_CODE_GROUP', alt.value('lightgrey')), opacity = alt.when(predicate).then(alt.value(1)).otherwise(alt.value(0.1)), tooltip=['STREET', 'STREET_COUNT', 'OFFENSE_CODE_GROUP', 'COUNT']).add_selection(selection, cutoff).properties(title="Bar Plot of All Crime Types on a Street-by-Street Level")


In [211]:
all_offense_bar.save("all_offense_bar.html")