# Py: Tidy data analysis - Police traffic activity 

We are going to make use of data from a open source project https://openpolicing.stanford.edu/ and we have collected data belongs to the state: Rhode Island for this analysis.

In [1]:
# Importing libraries
import datatable as dt
import pandas as pd
import altair as alt
from datatable import f,by,count,update,sort,join

In [2]:
# Datatable options are set to display limit number of rows and datatable frame columns colors are maintained
dt.init_styles()
dt.options.display.head_nrows=4
dt.options.display.tail_nrows=4

In [3]:
# Importing data of police activities
policia_dt = dt.fread('https://assets.datacamp.com/production/repositories/1497/datasets/62bd9feef451860db02d26553613a299721882e8/police.csv',na_strings=[""])

In [4]:
# Importing weather reports for the state RI
weather_dt = dt.fread('https://assets.datacamp.com/production/repositories/1497/datasets/02f3fb2d4416d3f6626e1117688e0386784e8e55/weather.csv',na_strings=[""])

In [5]:
# Glance
policia_dt

Unnamed: 0_level_0,state,stop_date,stop_time,county_name,driver_gender,driver_race,violation_raw,violation,search_conducted,search_type,stop_outcome,is_arrested,stop_duration,drugs_related_stop,district
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪,▪▪▪▪,▪▪▪▪,▪,▪▪▪▪,▪,▪▪▪▪
0,RI,2005-01-04,12:55,,M,White,Equipment/Inspection Violation,Equipment,0,,Citation,0,0-15 Min,0,Zone X4
1,RI,2005-01-23,23:15,,M,White,Speeding,Speeding,0,,Citation,0,0-15 Min,0,Zone K3
2,RI,2005-02-17,04:15,,M,White,Speeding,Speeding,0,,Citation,0,0-15 Min,0,Zone X4
3,RI,2005-02-20,17:15,,M,White,Call for Service,Other,0,,Arrest Driver,1,16-30 Min,0,Zone X1
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
91737,RI,2015-12-31,21:59,,F,White,Speeding,Speeding,0,,Citation,0,0-15 Min,0,Zone K3
91738,RI,2015-12-31,22:04,,M,White,Other Traffic Violation,Moving violation,0,,Citation,0,0-15 Min,0,Zone X3
91739,RI,2015-12-31,22:09,,F,Hispanic,Equipment/Inspection Violation,Equipment,0,,Warning,0,0-15 Min,0,Zone K3
91740,RI,2015-12-31,22:47,,M,White,Registration Violation,Registration/plates,0,,Citation,0,0-15 Min,0,Zone X4


In [6]:
# Glance
weather_dt

Unnamed: 0_level_0,STATION,DATE,TAVG,TMIN,TMAX,AWND,WSF2,WT01,WT02,WT03,…,WT17,WT18,WT19,WT21,WT22
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,Unnamed: 11_level_1,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,USW00014765,2005-01-01,44,35,53,8.95,25.1,1,,,…,,,,,
1,USW00014765,2005-01-02,36,28,44,9.4,14.1,,,,…,,1,,,
2,USW00014765,2005-01-03,49,44,53,6.93,17,1,,,…,,,,,
3,USW00014765,2005-01-04,42,39,45,6.93,16.1,1,,,…,,,,,
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋱,⋮,⋮,⋮,⋮,⋮
4013,USW00014765,2015-12-28,40,30,44,12.3,23,,,,…,,,,,
4014,USW00014765,2015-12-29,33,28,40,12.53,18.1,1,,,…,,,,,
4015,USW00014765,2015-12-30,30,27,35,6.93,15,1,,,…,,,,,
4016,USW00014765,2015-12-31,39,35,50,8.05,18.1,1,,,…,,,,,


In [7]:
weather_dt = weather_dt[:,([f.DATE,f.TAVG,f.TMIN,f.TMAX])]

In [8]:
weather_dt.names = {'DATE': "stop_date",
                    'TAVG': "temp_avg",
                    'TMIN': "temp_min",
                    'TMAX': "temp_max"}

In [9]:
weather_dt

Unnamed: 0_level_0,stop_date,temp_avg,temp_min,temp_max
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪
0,2005-01-01,44,35,53
1,2005-01-02,36,28,44
2,2005-01-03,49,44,53
3,2005-01-04,42,39,45
⋮,⋮,⋮,⋮,⋮
4013,2015-12-28,40,30,44
4014,2015-12-29,33,28,40
4015,2015-12-30,30,27,35
4016,2015-12-31,39,35,50


In [10]:
weather_dt.key="stop_date"

In [11]:
# count the number of missing values
policia_dt.countna()

Unnamed: 0_level_0,state,stop_date,stop_time,county_name,driver_gender,driver_race,violation_raw,violation,search_conducted,search_type,stop_outcome,is_arrested,stop_duration,drugs_related_stop,district
Unnamed: 0_level_1,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,0,0,91741,5205,5202,5202,5202,0,88434,5202,5202,5202,0,0


In [12]:
del policia_dt[:,['county_name', 'state']]

In [13]:
# glance
policia_dt

Unnamed: 0_level_0,stop_date,stop_time,driver_gender,driver_race,violation_raw,violation,search_conducted,search_type,stop_outcome,is_arrested,stop_duration,drugs_related_stop,district
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪,▪▪▪▪,▪▪▪▪,▪,▪▪▪▪,▪,▪▪▪▪
0,2005-01-04,12:55,M,White,Equipment/Inspection Violation,Equipment,0,,Citation,0,0-15 Min,0,Zone X4
1,2005-01-23,23:15,M,White,Speeding,Speeding,0,,Citation,0,0-15 Min,0,Zone K3
2,2005-02-17,04:15,M,White,Speeding,Speeding,0,,Citation,0,0-15 Min,0,Zone X4
3,2005-02-20,17:15,M,White,Call for Service,Other,0,,Arrest Driver,1,16-30 Min,0,Zone X1
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
91737,2015-12-31,21:59,F,White,Speeding,Speeding,0,,Citation,0,0-15 Min,0,Zone K3
91738,2015-12-31,22:04,M,White,Other Traffic Violation,Moving violation,0,,Citation,0,0-15 Min,0,Zone X3
91739,2015-12-31,22:09,F,Hispanic,Equipment/Inspection Violation,Equipment,0,,Warning,0,0-15 Min,0,Zone K3
91740,2015-12-31,22:47,M,White,Registration Violation,Registration/plates,0,,Citation,0,0-15 Min,0,Zone X4


In [14]:
policia_dt[:,count(),by(f.driver_gender)]

Unnamed: 0_level_0,driver_gender,count
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪▪▪▪▪
0,,5205
1,F,23774
2,M,62762


In [15]:
policia_tidy_dt = policia_dt[~dt.isna(f.driver_gender),:]

In [16]:
policia_tidy_dt[:,count(),by(f.violation)
               ][:,f[:].extend({'grand_tot':dt.sum(f.count)})
                ][:,f[:].extend({'prop':f.count/f.grand_tot})
                 ][:,f[:].remove(f.grand_tot),sort(-f.prop)
                  ]

Unnamed: 0_level_0,violation,count,prop
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,Speeding,48423,0.559571
1,Moving violation,16224,0.187483
2,Equipment,10921,0.126202
3,Other,4409,0.0509499
4,Registration/plates,3703,0.0427914
5,Seat belt,2856,0.0330036


In [17]:
# custom function to generate a summary report per a single group column
def py_dt_one_group_proportions_summary(DT,por):
    
    DT_summary = DT[:,dt.count(),by(f[por])
                   ][:,f[:].extend({'grand_tot':dt.sum(f.count)})
                    ][:,f[:].extend({'prop':f.count/f.grand_tot})
                     ][:,f[:].remove(f.grand_tot),dt.sort(-f.prop)
                      ]
    return DT_summary

In [18]:
py_dt_one_group_proportions_summary(policia_tidy_dt,'search_conducted')

Unnamed: 0_level_0,search_conducted,count,prop
Unnamed: 0_level_1,▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,83229,0.961785
1,1,3307,0.0382153


In [19]:
policia_tidy_dt[:,count(),by(f.driver_gender,f.violation)
               ][:,f[:].extend({'group_tot':dt.sum(f.count)}),by(f.driver_gender)
                ][:,f[:].extend({'prop':f.count/f.group_tot})
                 ][:,f[:].remove(f[1])]

Unnamed: 0_level_0,driver_gender,violation,count,group_tot,prop
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,F,Equipment,2501,23774,0.105199
1,F,Moving violation,3286,23774,0.138218
2,F,Other,707,23774,0.0297384
3,F,Registration/plates,1056,23774,0.0444183
4,F,Seat belt,578,23774,0.0243123
5,F,Speeding,15646,23774,0.658114
6,M,Equipment,8420,62762,0.134158
7,M,Moving violation,12938,62762,0.206144
8,M,Other,3702,62762,0.0589847
9,M,Registration/plates,2647,62762,0.0421752


In [20]:
# custom function to generate a summary report per two groups column
def py_dt_two_group_proportions_summary(DT,por1,por2):
    
    DT_summary = DT[:,dt.count(),by(f[por1],f[por2])
                   ][:,f[:].extend({'group_tot':dt.sum(f.count)}),by(f[por1])
                    ][:,f[:].extend({'prop':f.count/f.group_tot})
                     ][:,f[:].remove(f[1])
                      ]
    
    return DT_summary

In [21]:
py_dt_two_group_proportions_summary(policia_tidy_dt[f.violation=="Speeding",:],'driver_gender','stop_outcome')

Unnamed: 0_level_0,driver_gender,stop_outcome,count,group_tot,prop
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,F,Arrest Driver,90,15646,0.00575227
1,F,Arrest Passenger,10,15646,0.000639141
2,F,Citation,14898,15646,0.952192
3,F,N/D,15,15646,0.000958711
4,F,No Action,6,15646,0.000383485
5,F,Warning,627,15646,0.0400741
6,M,Arrest Driver,521,32777,0.0158953
7,M,Arrest Passenger,42,32777,0.00128139
8,M,Citation,30961,32777,0.944595
9,M,N/D,32,32777,0.000976294


In [22]:
py_dt_one_group_proportions_summary(policia_tidy_dt,'search_conducted')

Unnamed: 0_level_0,search_conducted,count,prop
Unnamed: 0_level_1,▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,83229,0.961785
1,1,3307,0.0382153


In [23]:
py_dt_two_group_proportions_summary(policia_tidy_dt,'driver_gender','search_conducted')

Unnamed: 0_level_0,driver_gender,search_conducted,count,group_tot,prop
Unnamed: 0_level_1,▪▪▪▪,▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,F,0,23318,23774,0.980819
1,F,1,456,23774,0.0191806
2,M,0,59911,62762,0.954574
3,M,1,2851,62762,0.0454256


In [24]:
py_dt_one_group_proportions_summary(policia_tidy_dt,'search_type')[~dt.isna(f.search_type),:]

Unnamed: 0_level_0,search_type,count,prop
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,Incident to Arrest,1290,0.0149071
1,Probable Cause,924,0.0106776
2,Inventory,219,0.00253074
3,Reasonable Suspicion,214,0.00247296
4,Protective Frisk,164,0.00189517
5,"Incident to Arrest,Inventory",123,0.00142137
6,"Incident to Arrest,Probable Cause",100,0.00115559
7,"Probable Cause,Reasonable Suspicion",54,0.000624018
8,"Incident to Arrest,Inventory,Probable Cause",35,0.000404456
9,"Probable Cause,Protective Frisk",35,0.000404456


In [25]:
policia_tidy_dt[dt.f.search_type.re_match("[\w\s\W]+Frisk"),:
               ][:,count(),by(f.driver_gender)]

Unnamed: 0_level_0,driver_gender,count
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪▪▪▪▪
0,F,30
1,M,246


In [26]:
py_dt_one_group_proportions_summary(policia_tidy_dt,'stop_duration')

Unnamed: 0_level_0,stop_duration,count,prop
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,0-15 Min,69577,0.804024
1,16-30 Min,13740,0.158778
2,30+ Min,3219,0.0371984


In [27]:
py_dt_two_group_proportions_summary(policia_tidy_dt,'stop_duration','is_arrested')

Unnamed: 0_level_0,stop_duration,is_arrested,count,group_tot,prop
Unnamed: 0_level_1,▪▪▪▪,▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,0-15 Min,0,68627,69577,0.986346
1,0-15 Min,1,950,69577,0.0136539
2,16-30 Min,0,12454,13740,0.906405
3,16-30 Min,1,1286,13740,0.0935953
4,30+ Min,0,2377,3219,0.738428
5,30+ Min,1,842,3219,0.261572


In [28]:
py_dt_two_group_proportions_summary(policia_tidy_dt,'driver_race','is_arrested')

Unnamed: 0_level_0,driver_race,is_arrested,count,group_tot,prop
Unnamed: 0_level_1,▪▪▪▪,▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,Asian,0,2343,2389,0.980745
1,Asian,1,46,2389,0.0192549
2,Black,0,11516,12285,0.937403
3,Black,1,769,12285,0.0625967
4,Hispanic,0,9122,9727,0.937802
5,Hispanic,1,605,9727,0.062198
6,Other,0,263,265,0.992453
7,Other,1,2,265,0.00754717
8,White,0,60214,61870,0.973234
9,White,1,1656,61870,0.0267658


In [29]:
# stop time
stop_time_df = policia_tidy_dt[:,(f.stop_time)].to_pandas()

In [30]:
# extracting hour
stop_time_hour = stop_time_df.stop_time.str.extract(r'([\d]{2})')

In [31]:
# a new dataframe
stop_time_hour_dt = dt.Frame(stop_time_hour)

In [32]:
# change a col name
stop_time_hour_dt.names={'0':'stop_hour'}

In [33]:
# Binding two dts
policia_tidy_dt_v1 = dt.cbind(policia_tidy_dt,stop_time_hour_dt)

In [34]:
# Hour wise arrests
hour_wise_arrests_dt = py_dt_two_group_proportions_summary(policia_tidy_dt_v1,'stop_hour','is_arrested')

In [35]:
# Visualization
alt.Chart(hour_wise_arrests_dt.to_pandas()).mark_bar().encode(
    alt.X('stop_hour:N'),
    alt.Y('count'),
    alt.Color('is_arrested')
).properties(

    title= 'Hour wise arrest trends'
)

In [36]:
# Hour wise arrest rates
hour_wise_arrests_rates_dt= hour_wise_arrests_dt[f.is_arrested==True,:
                                                ][:,dt.mean(f.count),by(f.stop_hour)
                                                 ]

In [37]:
# Visualization
alt.Chart(hour_wise_arrests_rates_dt.to_pandas()).mark_line().encode(
    alt.X('stop_hour'),
    alt.Y('count')
).properties(

    title = 'Hourly wise - average arrest rates'
)

In [38]:
py_dt_one_group_proportions_summary(policia_tidy_dt_v1,'drugs_related_stop')

Unnamed: 0_level_0,drugs_related_stop,count,prop
Unnamed: 0_level_1,▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,0,85674,0.990039
1,1,862,0.00996117


In [39]:
# stop date and converting to pandas frame
stop_date_df = policia_tidy_dt[:,(f.stop_date)].to_pandas()

In [40]:
# converting to date object
stop_date_df = stop_date_df.apply(lambda x: pd.to_datetime(x,format="%Y-%m-%d"))

In [41]:
# extracting year and months
stop_year_month_dt = dt.cbind(dt.Frame({'year':list(stop_date_df['stop_date'].dt.year)}),
                              dt.Frame({'month':list(stop_date_df['stop_date'].dt.month)})
                             )

In [42]:
# Joining two DTs
policia_tidy_dt_v2 = dt.cbind(policia_tidy_dt_v1,stop_year_month_dt
                             )[:,f[:].remove([f[0],f[1]])]

In [43]:
# Year wise counts drug related stops
policia_tidy_dt_v2[f.drugs_related_stop==True,:
                  ][:,count(),by(f.year)
                   ]

Unnamed: 0_level_0,year,count
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪▪▪▪▪
0,2005,16
1,2006,73
2,2007,72
3,2008,62
4,2009,71
5,2010,72
6,2011,75
7,2012,103
8,2013,99
9,2014,120


In [44]:
# Joining police and weather dataframes
policia_tidy_dt_v3 = policia_dt[:,:,join(weather_dt)]

In [45]:
weather_dt

stop_date,temp_avg,temp_min,temp_max
▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪
2005-01-01,44,35,53
2005-01-02,36,28,44
2005-01-03,49,44,53
2005-01-04,42,39,45
⋮,⋮,⋮,⋮
2015-12-28,40,30,44
2015-12-29,33,28,40
2015-12-30,30,27,35
2015-12-31,39,35,50


In [46]:
# Visualization
alt.Chart(weather_dt.to_pandas()).transform_fold(
    
    ['temp_avg','temp_min','temp_max'],
    as_=['temp_type','temp_val']
    
).mark_boxplot().encode(
    
    alt.Y('temp_type:O'),
    alt.X('temp_val:Q')
    
).properties(title='Weather temp distributions')

In [47]:
# Adding a new column temp_diff
weather_dt[:,update(temp_diff=f.temp_max-f.temp_min)]

In [48]:
# Visualiztion
alt.Chart(weather_dt[:,f.temp_diff].to_pandas()).mark_bar().encode(
    alt.X('temp_diff',bin=True),
    alt.Y('count()')
).properties(

    title='Distribution of temparature differences'
)

In [49]:
# Downloading weather data and selecting specific columns related to weather conditions
weather_temp = dt.fread('https://assets.datacamp.com/production/repositories/1497/datasets/02f3fb2d4416d3f6626e1117688e0386784e8e55/weather.csv',na_strings=[""]
                       )[:,[f[1],f[7:]]]

In [50]:
weather_temp

Unnamed: 0_level_0,DATE,WT01,WT02,WT03,WT04,WT05,WT06,WT07,WT08,WT09,…,WT17,WT18,WT19,WT21,WT22
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,Unnamed: 11_level_1,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪▪▪▪▪
0,2005-01-01,1,,,,,,,,,…,,,,,
1,2005-01-02,,,,,,,,,,…,,1,,,
2,2005-01-03,1,,,,,,,,,…,,,,,
3,2005-01-04,1,,,,,,,,,…,,,,,
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋱,⋮,⋮,⋮,⋮,⋮
4013,2015-12-28,,,,,,,,,,…,,,,,
4014,2015-12-29,1,,,1,,1,,,,…,,,,,
4015,2015-12-30,1,,,,,1,,,,…,,,,,
4016,2015-12-31,1,,,,,,,,,…,,,,,


In [51]:
# New column : sum of rows
weather_temp[:,update(tot_cond=dt.rowsum(f[1:]))]

In [52]:
# select few columms
weather_temp_1= weather_temp[:,[f[0],f[-1]]]

In [53]:
# renaming dataframe column
weather_temp_1.names = {'DATE':'stop_date'}

In [54]:
# apply a key
weather_temp_1.key="stop_date"

In [55]:
# Visualization
alt.Chart(weather_temp_1.to_pandas()).mark_bar().encode(
    alt.X('tot_cond',bin=True),
    alt.Y('count()')
).properties(
    title='Weather conditions distribution'
)

In [56]:
# Joining two dataframes
policia_tidy_dt_v4 = policia_tidy_dt_v3[:,:,join(weather_temp_1)]

In [57]:
# selecting first 5 and last 4 columns
policia_tidy_dt_v4[:,[f[:5],f[-4:]]]

Unnamed: 0_level_0,stop_date,stop_time,driver_gender,driver_race,violation_raw,temp_avg,temp_min,temp_max,tot_cond
Unnamed: 0_level_1,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪,▪▪▪▪,▪▪▪▪,▪▪▪▪▪▪▪▪
0,2005-01-04,12:55,M,White,Equipment/Inspection Violation,42,39,45,4
1,2005-01-23,23:15,M,White,Speeding,17,5,28,6
2,2005-02-17,04:15,M,White,Speeding,34,29,39,0
3,2005-02-20,17:15,M,White,Call for Service,26,18,34,1
⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮,⋮
91737,2015-12-31,21:59,F,White,Speeding,39,35,50,1
91738,2015-12-31,22:04,M,White,Other Traffic Violation,39,35,50,1
91739,2015-12-31,22:09,F,Hispanic,Equipment/Inspection Violation,39,35,50,1
91740,2015-12-31,22:47,M,White,Registration Violation,39,35,50,1


In [58]:
# Speed violation 
policia_zone_speed_violations_weather = py_dt_two_group_proportions_summary(policia_tidy_dt_v4[f.violation=="Speeding",:],'district','tot_cond')

In [59]:
# Visualization
alt.Chart(policia_zone_speed_violations_weather.to_pandas()).mark_bar().encode(
    alt.Y('district'),
    alt.X('count'),
    alt.Color('tot_cond')
).properties(

    title='Speed Violations - Disticts and weather conditions'
)