In [18]:
import pandas as pd
import altair as alt
import numpy as np
data = pd.read_csv("denver-1.nov.csv")
alt.data_transformers.disable_max_rows()

DataTransformerRegistry.enable('default')

In [2]:
alt.Chart(data).mark_bar().encode(
    x=alt.X("precinct", sort="-y"),
    y=alt.Y("count()"),
    color=alt.Color('outcome', scale=alt.Scale(scheme='tableau10')),
    tooltip=["count()","arrest_made"])

In [3]:
data

Unnamed: 0,raw_row_number,date,time,location,lat,lng,district,precinct,type,disposition,arrest_made,citation_issued,warning_issued,outcome
0,1,2012-11-13,00:02:22,8500 W CRESTLINE AVE,39.618329,-105.092691,4,423,vehicular,K - Street Check Completed,False,False,False,
1,16,2012-11-29,00:25:01,8500 W CRESTLINE AVE,39.618329,-105.092691,4,423,vehicular,Party Advised,False,False,False,
2,72,2012-11-23,22:58:10,4830 S HOLLAND WAY,39.630666,-105.102551,4,423,vehicular,K - Street Check Completed,False,False,False,
3,121,2012-11-27,17:48:33,4885 S QUEBEC ST,39.626174,-104.904062,3,324,pedestrian,In Service,False,False,False,
4,138,2012-11-14,23:46:58,W LAYTON AVE / S WADSWORTH BLVD,39.630150,-105.081693,4,423,vehicular,Party Advised,False,False,False,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6360,672025,2012-11-06,00:03:34,E 56TH AVE / N TOWER RD,39.798303,-104.772121,5,522,vehicular,T - Citation Issued,False,True,False,citation
6361,672116,2012-11-09,07:33:48,5300-BLK N PEORIA ST,39.792865,-104.847131,5,511,vehicular,Warning Issued,False,False,True,warning
6362,672283,2012-11-28,15:57:42,E RANDOLPH PL / N WORCHESTER ST,39.797568,-104.830744,5,521,vehicular,Party Advised,False,False,False,
6363,672463,2012-11-29,20:37:09,E 55TH AVE / E MAXWELL PL,39.794786,-104.749367,5,523,vehicular,Warning Issued,False,False,True,warning


In [4]:
# in class, we looked at the probability that the Denver police made an arrest after a vehicular stop 
# vs. a pedestrian stop.

# I need to aggregate the data and compute some statistic - will use pandas + numpy for this
# Since I've already started looking at this, I'll filter by the outcome
# maybe there is a tendency for arrests to be made at a certain time in comparison to citations or warnings

In [5]:
alt.Chart(data).mark_bar().encode(
    x=alt.X("district", sort="x"),
    y=alt.Y("count()"),
    color=alt.Color('outcome', scale=alt.Scale(scheme='tableau10')),
    tooltip=["count()","outcome"])
# here, there is quite a high number of NaN (or non-existent entries), which means that there is not data available.
# that's unfortunate, and it may indicate that I should reconsider my analysis of the outcome

In [1]:
# in class we looked at the probability of an arrest within the types of stops (pedestrian/vehicular)

# now, let's look at the probabilty of each outcome within each district
# generally, we shouldn't expect one district to be arresting much more often (rate-wise) than another,
# because we don't expect an inherent difference in the people within each district

# so...
# 1. filtering by district
# 2. aggregating by outcome

In [4]:
data

Unnamed: 0,raw_row_number,date,time,location,lat,lng,district,precinct,type,disposition,arrest_made,citation_issued,warning_issued,outcome
0,1,2012-11-13,00:02:22,8500 W CRESTLINE AVE,39.618329,-105.092691,4,423,vehicular,K - Street Check Completed,False,False,False,
1,16,2012-11-29,00:25:01,8500 W CRESTLINE AVE,39.618329,-105.092691,4,423,vehicular,Party Advised,False,False,False,
2,72,2012-11-23,22:58:10,4830 S HOLLAND WAY,39.630666,-105.102551,4,423,vehicular,K - Street Check Completed,False,False,False,
3,121,2012-11-27,17:48:33,4885 S QUEBEC ST,39.626174,-104.904062,3,324,pedestrian,In Service,False,False,False,
4,138,2012-11-14,23:46:58,W LAYTON AVE / S WADSWORTH BLVD,39.630150,-105.081693,4,423,vehicular,Party Advised,False,False,False,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
6360,672025,2012-11-06,00:03:34,E 56TH AVE / N TOWER RD,39.798303,-104.772121,5,522,vehicular,T - Citation Issued,False,True,False,citation
6361,672116,2012-11-09,07:33:48,5300-BLK N PEORIA ST,39.792865,-104.847131,5,511,vehicular,Warning Issued,False,False,True,warning
6362,672283,2012-11-28,15:57:42,E RANDOLPH PL / N WORCHESTER ST,39.797568,-104.830744,5,521,vehicular,Party Advised,False,False,False,
6363,672463,2012-11-29,20:37:09,E 55TH AVE / E MAXWELL PL,39.794786,-104.749367,5,523,vehicular,Warning Issued,False,False,True,warning


In [5]:
data1 = data.dropna()

In [20]:
2777 / 6365

0.43629222309505106

In [7]:
alt.Chart(data1).mark_bar().encode(
    x=alt.X("precinct", sort="-y"),
    y=alt.Y("count()"),
    color=alt.Color('outcome', scale=alt.Scale(scheme='tableau10')),
    tooltip=["count()","outcome"])

In [12]:
data1 = data1[["date","district","precinct","type","outcome"]]
data1

Unnamed: 0,date,district,precinct,type,outcome
22,2012-11-01,4,423,vehicular,warning
24,2012-11-01,4,423,vehicular,warning
25,2012-11-01,4,423,vehicular,citation
26,2012-11-06,4,423,vehicular,arrest
30,2012-11-03,4,423,pedestrian,arrest
...,...,...,...,...,...
6359,2012-11-05,5,522,vehicular,citation
6360,2012-11-06,5,522,vehicular,citation
6361,2012-11-09,5,511,vehicular,warning
6363,2012-11-29,5,523,vehicular,warning


In [19]:
data1.groupby("date").agg({"district":np.mean}).sort_values("district",ascending=False).head(20)

DataError: No numeric types to aggregate