In [343]:
# importing altair and pandas for use in visualizations.
import altair as alt
import pandas as pd

# loading the crime csv from the zip file and displaying it's head
# this fails if the macosx folder remains in the zip
crime = pd.read_csv("crime.csv.zip", encoding="ISO-8859-1", compression='zip')
crime.head()

Unnamed: 0,INCIDENT_NUMBER,OFFENSE_CODE,OFFENSE_CODE_GROUP,OFFENSE_DESCRIPTION,DISTRICT,REPORTING_AREA,SHOOTING,OCCURRED_ON_DATE,YEAR,MONTH,DAY_OF_WEEK,HOUR,UCR_PART,STREET,Lat,Long,Location
0,I182070945,619,Larceny,LARCENY ALL OTHERS,D14,808,,2018-09-02 13:00:00,2018,9,Sunday,13,Part One,LINCOLN ST,42.357791,-71.139371,"(42.35779134, -71.13937053)"
1,I182070943,1402,Vandalism,VANDALISM,C11,347,,2018-08-21 00:00:00,2018,8,Tuesday,0,Part Two,HECLA ST,42.306821,-71.0603,"(42.30682138, -71.06030035)"
2,I182070941,3410,Towed,TOWED MOTOR VEHICLE,D4,151,,2018-09-03 19:27:00,2018,9,Monday,19,Part Three,CAZENOVE ST,42.346589,-71.072429,"(42.34658879, -71.07242943)"
3,I182070940,3114,Investigate Property,INVESTIGATE PROPERTY,D4,272,,2018-09-03 21:16:00,2018,9,Monday,21,Part Three,NEWCOMB ST,42.334182,-71.078664,"(42.33418175, -71.07866441)"
4,I182070938,3114,Investigate Property,INVESTIGATE PROPERTY,B3,421,,2018-09-03 21:05:00,2018,9,Monday,21,Part Three,DELHI ST,42.275365,-71.090361,"(42.27536542, -71.09036101)"


In [344]:
# merging related crimes to minimize chart junk

merge_dict = {
    'Larceny' : "Larceny/Other Theft",
    "Robbery" : "Larceny/Other Theft",
    "Auto Theft" : "Larceny/Other Theft",
    "Larceny From Motor Vehicle" : "Larceny/Other Theft",
    "Other Burglary"  : "Larceny/Other Theft",
    "Burglary - No Property Taken"  : "Larceny/Other Theft",
    "Auto Theft Recovery"  : "Larceny/Other Theft",
    "Commercial Burglary" : "Larceny/Other Theft",
    "Residential Burglary" : "Larceny/Other Theft",
    "Recovered Stolen Property" : "Larceny/Other Theft",
    "Simple Assault" : "Assaults/Violent Crime",
    "Aggravated Assault" : "Assaults/Violent Crime",
    "Ballistics" : "Assaults/Violent Crime",
    "Arson" : "Assaults/Violent Crime",
    "Missing Person Reported" : "Missing Persons",
    "Missing Person Located" : "Missing Persons",
    "Property Lost" : "Missing Property",
    "Property Found" : "Missing Property",
    "HOME INVASION" : "Assaults/Violent Crime",
    "Restraining Order Violations" : "Court/Legal Order Violations",
    "Liquor Violation" : "Court/Legal Order Violations",
    "Drug Violation" : "Court/Legal Order Violations",
    "Firearm Violations" : "Court/Legal Order Violations",
    "Violations" : "Court/Legal Order Violations",
    "Assembly or Gathering Violations" : "Court/Legal Order Violations",
    "License Violation" : "Court/Legal Order Violations",
    "HUMAN TRAFFICKING" : "Human Trafficking",
    'HUMAN TRAFFICKING - INVOLUNTARY SERVITUDE' : "Human Trafficking",
    "INVESTIGATE PERSON" : "Investigate Person",
    "Bomb Hoax" : "Other Crimes",
    "Operating Under the Influence" : "Other Crimes",
    "Aircraft" : "Other Crimes",
    "Counterfeiting" : "Other Crimes",
    "Embezzlement" : "Other Crimes",
    "Gambling" : "Other Crimes",
    "Biological Threat" : "Other Crimes",
    "Disorderly Conduct" : "Other Crimes",
    "Other" : "Other Crimes",
    "Prostitution" : "Other Crimes",
    "Harassment" : "Other Crimes",
    "Verbal Disputes" : "Other Crimes",
    "Vandalism" : "Other Crimes",
    "Explosives" : "Other Crimes",
    "Fraud" : "Other Crimes",
    "Confidence Games" : "Other Crimes",
    "Fire Related Reports" : "Other Crimes"
}

crime.OFFENSE_CODE_GROUP.replace(to_replace=merge_dict, inplace=True)

crime["OFFENSE_CODE_GROUP"].unique()

#crime.head()

array(['Larceny/Other Theft', 'Other Crimes', 'Towed',
       'Investigate Property', 'Motor Vehicle Accident Response',
       'Missing Property', 'Medical Assistance',
       'Court/Legal Order Violations', 'Assaults/Violent Crime',
       'Police Service Incidents', 'Warrant Arrests',
       'Property Related Damage', 'Missing Persons', 'Investigate Person',
       'License Plate Related Incidents', 'Harbor Related Incidents',
       'Firearm Discovery', 'Landlord/Tenant Disputes', 'Service',
       'Search Warrants', 'Offenses Against Child / Family',
       'Evading Fare', 'Prisoner Related Incidents', 'Homicide',
       'Criminal Harassment', 'Phone Call Complaints', 'Manslaughter',
       'Human Trafficking'], dtype=object)

In [345]:
# generating the pre-filter set of street to offense code group to street count dataframes
# each street counts the number of specific offenses on that street alongside the total count for the street
group_crime = crime.groupby(["STREET", "OFFENSE_CODE_GROUP"]).size().reset_index(name="COUNT")
group_crime["STREET_COUNT"] = group_crime.groupby(["STREET"])["COUNT"].transform("sum")
group_crime.head()

Unnamed: 0,STREET,OFFENSE_CODE_GROUP,COUNT,STREET_COUNT
0,ALBANY ST,Assaults/Violent Crime,3,42
1,ALBANY ST,Court/Legal Order Violations,13,42
2,ALBANY ST,Evading Fare,1,42
3,ALBANY ST,Investigate Person,2,42
4,ALBANY ST,Larceny/Other Theft,7,42


In [346]:
#filtering the dataset to allow altair to visualize the data
#selecting streets with a number of crimes greater than 700 and condensing the data down to only Agg. Assault, Homicide, Robbery/Larceny, and Evading Fares

filtered_group_crime = group_crime.loc[group_crime["STREET_COUNT"] >= 700].reset_index()

filtered_group_crime = filtered_group_crime[(filtered_group_crime["OFFENSE_CODE_GROUP"] == 'Larceny/Other Theft') 
                                            | (filtered_group_crime["OFFENSE_CODE_GROUP"] == 'Homicide') 
                                            | (filtered_group_crime["OFFENSE_CODE_GROUP"] == 'Assaults/Violent Crime')
                                            | (filtered_group_crime["OFFENSE_CODE_GROUP"] == 'Court/Legal Order Violations') 
                                            | (filtered_group_crime["OFFENSE_CODE_GROUP"] == 'Motor Vehicle Accident Response')]

filtered_group_crime = filtered_group_crime.sort_values("STREET_COUNT", ascending=False)

# filtering only relevant data from the 20 streets with the most crime

#sort
filtered_group_crime_temp = filtered_group_crime.sort_values("STREET_COUNT", ascending=False).drop_duplicates("STREET_COUNT")

#select 20 largest streets
filtered_group_crime_temp = filtered_group_crime_temp.nlargest(20, "STREET_COUNT")

#get mask for 20 largest streets
filtered_group_crime_temp2 = filtered_group_crime_temp["STREET_COUNT"].isin(filtered_group_crime_temp["STREET_COUNT"])

#select 20 largest streets
filtered_group_crime_top_10_select = filtered_group_crime_temp.loc[filtered_group_crime_temp2 == True]

#get mask for 20 largest streets
filtered_group_crime_temp3 = filtered_group_crime["STREET"].isin(filtered_group_crime_top_10_select["STREET"])

#get all crimes on 20 largest streets
filtered_group_crime_top_10 = filtered_group_crime.loc[filtered_group_crime_temp3 == True]

filtered_group_crime_top_10.head()


Unnamed: 0,index,STREET,OFFENSE_CODE_GROUP,COUNT,STREET_COUNT
1156,28990,WASHINGTON ST,Motor Vehicle Accident Response,1130,14194
1150,28984,WASHINGTON ST,Larceny/Other Theft,2899,14194
1146,28980,WASHINGTON ST,Homicide,4,14194
1142,28976,WASHINGTON ST,Court/Legal Order Violations,1862,14194
1141,28975,WASHINGTON ST,Assaults/Violent Crime,1168,14194


In [347]:
#disabling max row restrictions to allow visualization

alt.data_transformers.disable_max_rows()

# adding a selection for crime type
selection = alt.selection_single(fields=["OFFENSE_CODE_GROUP"], bind="legend")

bar_w_outliers = alt.Chart(filtered_group_crime_top_10).mark_bar().encode(y='STREET:N', x='COUNT:Q', color=alt.condition(selection, 'OFFENSE_CODE_GROUP', alt.value('lightgrey')), tooltip=['STREET', 'STREET_COUNT', 'OFFENSE_CODE_GROUP', 'COUNT']).add_selection(selection).properties(title="Bar Plot of 5 Major Crime Types on a Street-by-Street Level")

bar_w_outliers
#bar.save("temp.html")

  selection = alt.selection_single(fields=["OFFENSE_CODE_GROUP"], bind="legend")
  bar_w_outliers = alt.Chart(filtered_group_crime_top_10).mark_bar().encode(y='STREET:N', x='COUNT:Q', color=alt.condition(selection, 'OFFENSE_CODE_GROUP', alt.value('lightgrey')), tooltip=['STREET', 'STREET_COUNT', 'OFFENSE_CODE_GROUP', 'COUNT']).add_selection(selection).properties(title="Bar Plot of 5 Major Crime Types on a Street-by-Street Level")


In [348]:
# bar_w_outliers.save("outlier_streetwise_totals.html")

In [349]:
#removing the outlier street of washington street

filtered_group_crime_no_outliers = filtered_group_crime_top_10[filtered_group_crime_top_10["STREET"] != "WASHINGTON ST"]

filtered_group_crime_no_outliers.head()

Unnamed: 0,index,STREET,OFFENSE_CODE_GROUP,COUNT,STREET_COUNT
149,3040,BLUE HILL AVE,Homicide,3,7794
144,3035,BLUE HILL AVE,Assaults/Violent Crime,660,7794
145,3036,BLUE HILL AVE,Court/Legal Order Violations,1281,7794
153,3044,BLUE HILL AVE,Larceny/Other Theft,1159,7794
158,3049,BLUE HILL AVE,Motor Vehicle Accident Response,1246,7794


In [350]:
# adding a selection for offense group
selection = alt.selection_single(fields=["OFFENSE_CODE_GROUP"], bind="legend")

bar_w_o_outliers = alt.Chart(filtered_group_crime_no_outliers).mark_bar().encode(y='STREET:N', x='COUNT:Q', color=alt.condition(selection, 'OFFENSE_CODE_GROUP', alt.value('lightgrey')), tooltip=['STREET', 'STREET_COUNT', 'OFFENSE_CODE_GROUP', 'COUNT']).add_selection(selection).properties(title="Bar Plot of 5 Major Crime Types on a Street-by-Street Level without Washington Street Outlier")

bar_w_o_outliers

  selection = alt.selection_single(fields=["OFFENSE_CODE_GROUP"], bind="legend")
  bar_w_o_outliers = alt.Chart(filtered_group_crime_no_outliers).mark_bar().encode(y='STREET:N', x='COUNT:Q', color=alt.condition(selection, 'OFFENSE_CODE_GROUP', alt.value('lightgrey')), tooltip=['STREET', 'STREET_COUNT', 'OFFENSE_CODE_GROUP', 'COUNT']).add_selection(selection).properties(title="Bar Plot of 5 Major Crime Types on a Street-by-Street Level without Washington Street Outlier")


In [351]:
# bar_w_o_outliers.save("no_outlier_streetwise_totals.html")

In [352]:
#a visualization of all offenses with filter

filtered_group_crime_all_offenses = group_crime.loc[group_crime["STREET_COUNT"] >= 700].reset_index()

filtered_group_crime_all_offenses = group_crime.loc[group_crime["COUNT"] >= 50].reset_index()

filtered_group_crime = filtered_group_crime_all_offenses.sort_values("STREET_COUNT", ascending=False)

# filtering only relevant data from the 20 streets with the most crime

#sort
filtered_group_crime_temp = filtered_group_crime.sort_values("STREET_COUNT", ascending=False).drop_duplicates("STREET_COUNT")

#select 20 largest streets
filtered_group_crime_temp = filtered_group_crime_temp.nlargest(20, "STREET_COUNT")

#get mask for 20 largest streets
filtered_group_crime_temp2 = filtered_group_crime_temp["STREET_COUNT"].isin(filtered_group_crime_temp["STREET_COUNT"])

#select 20 largest streets
filtered_group_crime_top_10_select = filtered_group_crime_temp.loc[filtered_group_crime_temp2 == True]

#get mask for 20 largest streets
filtered_group_crime_temp3 = filtered_group_crime["STREET"].isin(filtered_group_crime_top_10_select["STREET"])

#get all crimes on 20 largest streets
filtered_group_crime_top_10 = filtered_group_crime.loc[filtered_group_crime_temp3 == True]

filtered_group_crime_top_10.head()

# filtered_group_crime_all_offenses

Unnamed: 0,index,STREET,OFFENSE_CODE_GROUP,COUNT,STREET_COUNT
903,28989,WASHINGTON ST,Missing Property,795,14194
896,28975,WASHINGTON ST,Assaults/Violent Crime,1168,14194
898,28981,WASHINGTON ST,Investigate Person,811,14194
899,28982,WASHINGTON ST,Investigate Property,442,14194
900,28984,WASHINGTON ST,Larceny/Other Theft,2899,14194


In [357]:
# adding a selection for offense group
selection = alt.selection_single(fields=["OFFENSE_CODE_GROUP"], bind="legend")

#adding slider bar to allow visualization of smaller or larger crimes

slider = alt.binding_range(min=0, max=3000, step=10)
cutoff = alt.param(bind=slider, value=3000, name="CrimeCountCutoffSlider")
predicate = alt.datum.COUNT < cutoff

all_offense_bar = alt.Chart(filtered_group_crime_top_10).mark_bar().encode(y='STREET:N', x='COUNT:Q', color=alt.condition(selection, alt.Color('OFFENSE_CODE_GROUP:N', legend= alt.Legend(title="Offense Code Group")), alt.value('lightgrey')), opacity = alt.when(predicate).then(alt.value(1)).otherwise(alt.value(0.1)), tooltip=['STREET', 'STREET_COUNT', 'OFFENSE_CODE_GROUP', 'COUNT']).add_selection(selection, cutoff).properties(width = 600, height = 400, title="Bar Plot of All Crime Types on a Street-by-Street Level")

all_offense_bar

  selection = alt.selection_single(fields=["OFFENSE_CODE_GROUP"], bind="legend")
  all_offense_bar = alt.Chart(filtered_group_crime_top_10).mark_bar().encode(y='STREET:N', x='COUNT:Q', color=alt.condition(selection, alt.Color('OFFENSE_CODE_GROUP:N', legend= alt.Legend(title="Offense Code Group")), alt.value('lightgrey')), opacity = alt.when(predicate).then(alt.value(1)).otherwise(alt.value(0.1)), tooltip=['STREET', 'STREET_COUNT', 'OFFENSE_CODE_GROUP', 'COUNT']).add_selection(selection, cutoff).properties(width = 600, height = 400, title="Bar Plot of All Crime Types on a Street-by-Street Level")


In [354]:
all_offense_bar.save("all_offense_bar.html")