In [6]:
# importing altair and pandas for use in visualizations.
import altair as alt
import pandas as pd

# loading the crime csv from the zip file and displaying it's head
# this fails if the macosx folder remains in the zip
crime = pd.read_csv("crime.csv.zip", encoding="ISO-8859-1", compression='zip')
crime.head()

Unnamed: 0,INCIDENT_NUMBER,OFFENSE_CODE,OFFENSE_CODE_GROUP,OFFENSE_DESCRIPTION,DISTRICT,REPORTING_AREA,SHOOTING,OCCURRED_ON_DATE,YEAR,MONTH,DAY_OF_WEEK,HOUR,UCR_PART,STREET,Lat,Long,Location
0,I182070945,619,Larceny,LARCENY ALL OTHERS,D14,808,,2018-09-02 13:00:00,2018,9,Sunday,13,Part One,LINCOLN ST,42.357791,-71.139371,"(42.35779134, -71.13937053)"
1,I182070943,1402,Vandalism,VANDALISM,C11,347,,2018-08-21 00:00:00,2018,8,Tuesday,0,Part Two,HECLA ST,42.306821,-71.0603,"(42.30682138, -71.06030035)"
2,I182070941,3410,Towed,TOWED MOTOR VEHICLE,D4,151,,2018-09-03 19:27:00,2018,9,Monday,19,Part Three,CAZENOVE ST,42.346589,-71.072429,"(42.34658879, -71.07242943)"
3,I182070940,3114,Investigate Property,INVESTIGATE PROPERTY,D4,272,,2018-09-03 21:16:00,2018,9,Monday,21,Part Three,NEWCOMB ST,42.334182,-71.078664,"(42.33418175, -71.07866441)"
4,I182070938,3114,Investigate Property,INVESTIGATE PROPERTY,B3,421,,2018-09-03 21:05:00,2018,9,Monday,21,Part Three,DELHI ST,42.275365,-71.090361,"(42.27536542, -71.09036101)"


In [7]:
# adding a daily crime count column to the dataframe using .groupby and .transform

crime["day_count"] = crime.groupby("DAY_OF_WEEK")["DAY_OF_WEEK"].transform("size")
crime.head()

Unnamed: 0,INCIDENT_NUMBER,OFFENSE_CODE,OFFENSE_CODE_GROUP,OFFENSE_DESCRIPTION,DISTRICT,REPORTING_AREA,SHOOTING,OCCURRED_ON_DATE,YEAR,MONTH,DAY_OF_WEEK,HOUR,UCR_PART,STREET,Lat,Long,Location,day_count
0,I182070945,619,Larceny,LARCENY ALL OTHERS,D14,808,,2018-09-02 13:00:00,2018,9,Sunday,13,Part One,LINCOLN ST,42.357791,-71.139371,"(42.35779134, -71.13937053)",40313
1,I182070943,1402,Vandalism,VANDALISM,C11,347,,2018-08-21 00:00:00,2018,8,Tuesday,0,Part Two,HECLA ST,42.306821,-71.0603,"(42.30682138, -71.06030035)",46383
2,I182070941,3410,Towed,TOWED MOTOR VEHICLE,D4,151,,2018-09-03 19:27:00,2018,9,Monday,19,Part Three,CAZENOVE ST,42.346589,-71.072429,"(42.34658879, -71.07242943)",45679
3,I182070940,3114,Investigate Property,INVESTIGATE PROPERTY,D4,272,,2018-09-03 21:16:00,2018,9,Monday,21,Part Three,NEWCOMB ST,42.334182,-71.078664,"(42.33418175, -71.07866441)",45679
4,I182070938,3114,Investigate Property,INVESTIGATE PROPERTY,B3,421,,2018-09-03 21:05:00,2018,9,Monday,21,Part Three,DELHI ST,42.275365,-71.090361,"(42.27536542, -71.09036101)",45679


In [13]:
# adding a per offense code/weekday total to the dataframe
crime["crime_weekday_total"] = crime.groupby(["DAY_OF_WEEK", "OFFENSE_CODE_GROUP"])["OFFENSE_CODE_GROUP"].transform("size")
crime.head()

Unnamed: 0,INCIDENT_NUMBER,OFFENSE_CODE,OFFENSE_CODE_GROUP,OFFENSE_DESCRIPTION,DISTRICT,REPORTING_AREA,SHOOTING,OCCURRED_ON_DATE,YEAR,MONTH,DAY_OF_WEEK,HOUR,UCR_PART,STREET,Lat,Long,Location,day_count,crime_weekday_total
0,I182070945,619,Larceny,LARCENY ALL OTHERS,D14,808,,2018-09-02 13:00:00,2018,9,Sunday,13,Part One,LINCOLN ST,42.357791,-71.139371,"(42.35779134, -71.13937053)",40313,3080
1,I182070943,1402,Vandalism,VANDALISM,C11,347,,2018-08-21 00:00:00,2018,8,Tuesday,0,Part Two,HECLA ST,42.306821,-71.0603,"(42.30682138, -71.06030035)",46383,2071
2,I182070941,3410,Towed,TOWED MOTOR VEHICLE,D4,151,,2018-09-03 19:27:00,2018,9,Monday,19,Part Three,CAZENOVE ST,42.346589,-71.072429,"(42.34658879, -71.07242943)",45679,1701
3,I182070940,3114,Investigate Property,INVESTIGATE PROPERTY,D4,272,,2018-09-03 21:16:00,2018,9,Monday,21,Part Three,NEWCOMB ST,42.334182,-71.078664,"(42.33418175, -71.07866441)",45679,1539
4,I182070938,3114,Investigate Property,INVESTIGATE PROPERTY,B3,421,,2018-09-03 21:05:00,2018,9,Monday,21,Part Three,DELHI ST,42.275365,-71.090361,"(42.27536542, -71.09036101)",45679,1539


In [27]:
# sorting values and dropping colums with less than 300 offenses(eg. aircraft or homicide)
crime = crime.sort_values("crime_weekday_total", ascending=False)
crime = crime.drop(crime[crime["crime_weekday_total"] < 300].index)
crime.head()

Unnamed: 0,INCIDENT_NUMBER,OFFENSE_CODE,OFFENSE_CODE_GROUP,OFFENSE_DESCRIPTION,DISTRICT,REPORTING_AREA,SHOOTING,OCCURRED_ON_DATE,YEAR,MONTH,DAY_OF_WEEK,HOUR,UCR_PART,STREET,Lat,Long,Location,day_count,crime_weekday_total
34813,I182033514,3831,Motor Vehicle Accident Response,M/V - LEAVING SCENE - PROPERTY DAMAGE,B3,470.0,,2018-05-04 06:05:00,2018,5,Friday,6,Part Three,ITASCA ST,42.275609,-71.100318,"(42.27560924, -71.10031808)",48495,5852
174738,I162098218,3802,Motor Vehicle Accident Response,M/V ACCIDENT - PROPERTY DAMAGE,C11,345.0,,2016-12-02 09:22:00,2016,12,Friday,9,Part Three,ARCADIA ST,42.301402,-71.063058,"(42.30140245, -71.06305829)",48495,5852
191607,I162080164,3803,Motor Vehicle Accident Response,M/V ACCIDENT - PERSONAL INJURY,B2,,,2016-09-30 17:39:00,2016,9,Friday,17,Part Three,MASSACHUSETTS AVE,,,"(0.00000000, 0.00000000)",48495,5852
145666,I172022891,3803,Motor Vehicle Accident Response,M/V ACCIDENT - PERSONAL INJURY,C6,,,2017-03-24 05:45:00,2017,3,Friday,5,Part Three,,,,"(0.00000000, 0.00000000)",48495,5852
205646,I162064985,3831,Motor Vehicle Accident Response,M/V - LEAVING SCENE - PROPERTY DAMAGE,E13,579.0,,2016-08-12 17:20:00,2016,8,Friday,17,Part Three,CENTRE ST,42.32277,-71.104455,"(42.32277007, -71.10445458)",48495,5852


In [28]:
# dropping duplicate weekday totals to collate only the unique values for representation
daily_totals = crime.drop_duplicates("crime_weekday_total")
daily_totals

Unnamed: 0,INCIDENT_NUMBER,OFFENSE_CODE,OFFENSE_CODE_GROUP,OFFENSE_DESCRIPTION,DISTRICT,REPORTING_AREA,SHOOTING,OCCURRED_ON_DATE,YEAR,MONTH,DAY_OF_WEEK,HOUR,UCR_PART,STREET,Lat,Long,Location,day_count,crime_weekday_total
34813,I182033514,3831,Motor Vehicle Accident Response,M/V - LEAVING SCENE - PROPERTY DAMAGE,B3,470,,2018-05-04 06:05:00,2018,5,Friday,6,Part Three,ITASCA ST,42.275609,-71.100318,"(42.27560924, -71.10031808)",48495,5852
183598,I162088675,3803,Motor Vehicle Accident Response,M/V ACCIDENT - PERSONAL INJURY,C11,365,,2016-10-29 21:10:00,2016,10,Saturday,21,Part Three,LONSDALE ST,42.291115,-71.058436,"(42.29111533, -71.05843629)",44818,5490
269734,I152103802,3820,Motor Vehicle Accident Response,M/V ACCIDENT INVOLVING PEDESTRIAN - INJURY,E5,733,,2015-12-16 17:09:00,2015,12,Wednesday,17,Part Three,LAGRANGE ST,42.287454,-71.165322,"(42.28745399, -71.16532231)",46729,5251
283035,I152089019,3801,Motor Vehicle Accident Response,M/V ACCIDENT - OTHER,B2,587,,2015-10-27 08:01:00,2015,10,Tuesday,8,Part Three,TREMONT ST,42.332590,-71.100314,"(42.33258958, -71.10031377)",46383,5210
305123,I152064957,3802,Motor Vehicle Accident Response,M/V ACCIDENT - PROPERTY DAMAGE,C11,348,,2015-08-06 12:21:00,2015,8,Thursday,12,Part Three,DORCHESTER AVE,42.304704,-71.059083,"(42.30470399, -71.05908258)",46656,5187
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
289411,I152082116,3304,Assembly or Gathering Violations,NOISY PARTY/RADIO-NO ARREST,B2,586,,2015-10-03 00:52:00,2015,10,Saturday,0,Part Three,CHEROKEE ST,42.330477,-71.100727,"(42.33047722, -71.10072736)",44818,322
47886,I182019645,1503,Firearm Violations,"WEAPON - OTHER - CARRYING / POSSESSING, ETC",B3,442,,2018-03-15 19:07:00,2018,3,Thursday,19,Part Two,WESTCOTT ST,42.293218,-71.078865,"(42.29321805, -71.07886455)",46656,316
245956,I162021734,3111,License Violation,LICENSE PREMISE VIOLATION,C6,222,,2016-03-20 15:44:00,2016,3,Sunday,15,Part Three,E SECOND ST,42.337307,-71.036470,"(42.33730678, -71.03646955)",40313,307
263429,I162002851,1106,Confidence Games,FRAUD - CREDIT CARD / ATM FRAUD,C6,200,,2015-12-06 00:00:00,2015,12,Sunday,0,Part Two,W BROADWAY,42.342342,-71.056333,"(42.34234193, -71.05633276)",40313,306


In [30]:
# adding a selection for offense type
selection = alt.selection_single(fields=["OFFENSE_CODE_GROUP"], bind="legend")

# disabling max row restriction to allow full dataset use
alt.data_transformers.disable_max_rows()

# constructing the chart - encoding the day of week on the x and daily total count on the y, with color being affected by offense code group & selection
# tooltip allows for exact crime numbers to be displayed
bar = alt.Chart(daily_totals).mark_bar().encode(x= "DAY_OF_WEEK", y= "crime_weekday_total", color=alt.condition(selection, "OFFENSE_CODE_GROUP:N", alt.value('lightgrey')), tooltip=['DAY_OF_WEEK', 'day_count', "OFFENSE_CODE_GROUP", "crime_weekday_total"]).add_selection(selection).properties(title="Daily Crime Totals for Each Weekday")

bar

  selection = alt.selection_single(fields=["OFFENSE_CODE_GROUP"], bind="legend")
  bar = alt.Chart(daily_totals).mark_bar().encode(x= "DAY_OF_WEEK", y= "crime_weekday_total", color=alt.condition(selection, "OFFENSE_CODE_GROUP:N", alt.value('lightgrey')), tooltip=['DAY_OF_WEEK', 'day_count', "OFFENSE_CODE_GROUP", "crime_weekday_total"]).add_selection(selection).properties(title="Daily Crime Totals for Each Weekday")


In [9]:
bar.save("weekly_bar_chart.html")