# Base Analysis

### Setup

#### Load Libraries

In [13]:
library(tidyverse)
library(lubridate)


Attaching package: ‘lubridate’

The following object is masked from ‘package:base’:

    date



#### Parameters

In [62]:
## First save csv files into a folder called data within your working directory

census_path <- "data/ACS_15_5YR_B02001.csv.zip"
nd_path <- "data/nd_statewide_2019_08_13.csv.zip"

#### Load Data

In [103]:
census_data <-
    census_path %>%
    read_csv(skip = 1) %>%
    select(
        geography = Geography, 
        total_pop = `Estimate; Total:`, 
        total_na = `Estimate; Total: - American Indian and Alaska Native alone`
    ) %>%
    separate(geography, c("county", "state"), sep = ", ", remove = FALSE) %>%
    mutate(
        na_pop_prop = (total_na/total_pop) * 100,
        state = state %>% str_to_lower(),
        county = county %>% str_to_lower()
    )

nd_data <-
    nd_path %>%
    read_csv() %>%
    rename(county = "county_name") %>%
    mutate(
        state = "north dakota",
        county = county %>% str_to_lower()
    ) %>%
    left_join(census_data, by = c("state", "county"))

Multiple files in zip: reading 'ACS_15_5YR_B02001.csv'
Parsed with column specification:
cols(
  .default = col_double(),
  Id = [31mcol_character()[39m,
  Id2 = [31mcol_character()[39m,
  Geography = [31mcol_character()[39m
)
See spec(...) for full column specifications.
Multiple files in zip: reading 'nd_statewide_2019_08_13.csv'
Parsed with column specification:
cols(
  raw_row_number = [31mcol_character()[39m,
  date = [34mcol_date(format = "")[39m,
  time = [34mcol_time(format = "")[39m,
  location = [31mcol_character()[39m,
  county_name = [31mcol_character()[39m,
  subject_age = [32mcol_double()[39m,
  subject_race = [31mcol_character()[39m,
  subject_sex = [31mcol_character()[39m,
  type = [31mcol_character()[39m,
  violation = [31mcol_character()[39m,
  outcome = [31mcol_character()[39m,
  raw_Race = [31mcol_character()[39m
)


### Census Data Exploration

In [102]:
## Proportions are shown as percentages (ie 4.9 = 4.9%) to avoid scientific notation

census_data %>%
    filter(geography %>% str_detect("North Dakota"))

geography,county,state,total_pop,total_na,na_pop_prop
<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>
"Adams County, North Dakota",Adams County,North Dakota,2341,63,2.6911576
"Barnes County, North Dakota",Barnes County,North Dakota,11097,104,0.9371902
"Benson County, North Dakota",Benson County,North Dakota,6794,3735,54.9749779
"Billings County, North Dakota",Billings County,North Dakota,969,5,0.5159959
"Bottineau County, North Dakota",Bottineau County,North Dakota,6634,164,2.4721134
"Bowman County, North Dakota",Bowman County,North Dakota,3221,38,1.1797578
"Burke County, North Dakota",Burke County,North Dakota,2208,26,1.1775362
"Burleigh County, North Dakota",Burleigh County,North Dakota,88223,3345,3.7915283
"Cass County, North Dakota",Cass County,North Dakota,162500,1898,1.168
"Cavalier County, North Dakota",Cavalier County,North Dakota,3890,45,1.1568123


### North Dakota Analysis

In [104]:
nd_data %>%
    head()

raw_row_number,date,time,location,county,subject_age,subject_race,subject_sex,type,violation,outcome,raw_Race,state,geography,total_pop,total_na,na_pop_prop
<chr>,<date>,<drtn>,<chr>,<chr>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>
91444,2011-05-04,09:58:00,"94, 291",barnes county,101,white,male,vehicular,390902: Exceeded speed limit,citation,White,north dakota,"Barnes County, North Dakota",11097,104,0.9371902
138130,2011-12-28,11:41:00,gateway at 55th st,grand forks county,101,white,male,vehicular,391022: Failed to yield at intersection,citation,White,north dakota,"Grand Forks County, North Dakota",68979,1789,2.593543
118604,2011-09-16,19:03:00,"29, 104",traill county,102,white,male,vehicular,390902: Exceeded speed limit,citation,White,north dakota,"Traill County, North Dakota",8077,88,1.0895134
74973,2011-01-28,16:46:00,"200, 88",dunn county,11,black,male,vehicular,390902: Exceeded speed limit,citation,African American,north dakota,"Dunn County, North Dakota",4195,458,10.9177592
57595,2010-10-15,10:46:00,"29, 127",grand forks county,11,white,male,vehicular,390902: Exceeded speed limit,citation,White,north dakota,"Grand Forks County, North Dakota",68979,1789,2.593543
149505,2012-02-21,19:17:00,"85, 174",mckenzie county,11,white,male,vehicular,390902: Exceeded speed limit,citation,White,north dakota,"McKenzie County, North Dakota",9615,1709,17.774311


In [38]:
## What is the distribution of stops across each factor (ie county, violation, sex, etc)?

nd_data %>%
    count(raw_Race) %>%
    mutate(prop_of_stops = n/sum(n)) %>%
    arrange(desc(prop_of_stops))

## Native American stops seem to be slightly underrepresented at the state level, 
## based on fact that Native Americans make up ~5% of the total ND population. Will pull in census data later.

nd_data %>%
    count(county_name) %>%
    mutate(prop_of_stops = n/sum(n)) %>%
    arrange(desc(prop_of_stops))

nd_data %>%
    count(subject_sex) %>%
    mutate(prop_of_stops = n/sum(n)) %>%
    arrange(desc(prop_of_stops))

nd_data %>%
    count(type) %>%
    mutate(prop_of_stops = n/sum(n)) %>%
    arrange(desc(prop_of_stops))

nd_data %>%
    count(violation) %>%
    mutate(prop_of_stops = n/sum(n)) %>%
    arrange(desc(prop_of_stops))

nd_data %>%
    count(outcome) %>%
    mutate(prop_of_stops = n/sum(n)) %>%
    arrange(desc(prop_of_stops))

raw_Race,n,prop_of_stops
<chr>,<int>,<dbl>
White,291682,0.8835314359
Native American,13555,0.0410593338
African American,9562,0.0289641719
Hispanic,8713,0.0263924733
Other,4756,0.0144063587
Asian,1691,0.0051221935
,173,0.0005240328


county_name,n,prop_of_stops
<chr>,<int>,<dbl>
Cass County,40211,0.1218028
Ward County,32371,0.09805472
Grand Forks County,31219,0.0945652
Williams County,20328,0.06157537
Ramsey County,17096,0.05178535
Stark County,16373,0.04959531
Morton County,14808,0.04485479
Stutsman County,14450,0.04377037
Burleigh County,14176,0.0429404
Mckenzie County,10820,0.03277477


subject_sex,n,prop_of_stops
<chr>,<int>,<dbl>
male,242904,0.7357784159
female,87137,0.2639459368
,91,0.0002756473


type,n,prop_of_stops
<chr>,<int>,<dbl>
vehicular,327573,0.992248555
,2559,0.007751445


violation,n,prop_of_stops
<chr>,<int>,<dbl>
390902: Exceeded speed limit,171436,0.519295312
3921414: No seat belt,22670,0.068669502
390902: Exceeded speed limit|3921414: No seat belt,12436,0.037669781
3904371: Failed to register motor vehicle upon gainful employment,11686,0.035397962
391044: Disregarded stop sign,11486,0.034792144
3921394: Vehicle having tinted windshield,7330,0.022203240
3921463: Commercial Motor Vehicle Violations,4612,0.013970170
390801: Drove or in actual physical control of a motor vehicle while under the influence of alcohol or drugs and/or with AC of .08 or greater and/or,4576,0.013861122
3909011: Care required,3862,0.011698351
390411: Failed to display number plates/tabs,3775,0.011434820


outcome,n,prop_of_stops
<chr>,<int>,<dbl>
citation,330132,1


In [31]:
nd_na_stops_data <-
    nd_data %>%
    filter(raw_Race == "Native American")

In [33]:
## What is the distribution of Native American stops across relevant factors (ie county, sex, type, violation, etc)

nd_na_stops_data %>%
    count(county_name) %>%
    mutate(prop_of_stops = n/sum(n)) %>%
    arrange(desc(prop_of_stops))

nd_na_stops_data %>%
    count(subject_sex) %>%
    mutate(prop_of_stops = n/sum(n)) %>%
    arrange(desc(prop_of_stops))

nd_na_stops_data %>%
    count(type) %>%
    mutate(prop_of_stops = n/sum(n)) %>%
    arrange(desc(prop_of_stops))

nd_na_stops_data %>%
    count(violation) %>%
    mutate(prop_of_stops = n/sum(n)) %>%
    arrange(desc(prop_of_stops))

nd_na_stops_data %>%
    count(outcome) %>%
    mutate(prop_of_stops = n/sum(n)) %>%
    arrange(desc(prop_of_stops))

county_name,n,prop_of_stops
<chr>,<int>,<dbl>
Rolette County,2733,0.2016230173
Ramsey County,2305,0.1700479528
Ward County,1375,0.1014385835
Morton County,1127,0.0831427518
Mclean County,755,0.0556990041
Grand Forks County,687,0.050682405
Burleigh County,582,0.0429361859
Bottineau County,488,0.0360014755
Cass County,364,0.0268535596
Pierce County,361,0.026632239


subject_sex,n,prop_of_stops
<chr>,<int>,<dbl>
male,7997,0.5899668
female,5558,0.4100332


type,n,prop_of_stops
<chr>,<int>,<dbl>
vehicular,13297,0.98096643
,258,0.01903357


violation,n,prop_of_stops
<chr>,<int>,<dbl>
390902: Exceeded speed limit,5350,0.394688307
3921414: No seat belt,804,0.059313906
3904371: Failed to register motor vehicle upon gainful employment,519,0.038288454
390642: Drove while license suspended(4 or more offenses),454,0.033493176
390902: Exceeded speed limit|3921414: No seat belt,452,0.033345629
3921394: Vehicle having tinted windshield,450,0.033198082
390601: Drove without\expired operators license,235,0.017336776
390820: Driving without Liability Insurance,227,0.016746588
390642: Drove while license suspended(4 or more offenses)|390820: Driving without Liability Insurance,208,0.015344891
391044: Disregarded stop sign,200,0.014754703


outcome,n,prop_of_stops
<chr>,<int>,<dbl>
citation,13555,1


#### Glancing at disproportionate stops with census data

In [114]:
nd_data %>% head()

raw_row_number,date,time,location,county,subject_age,subject_race,subject_sex,type,violation,outcome,raw_Race,state,geography,total_pop,total_na,na_pop_prop
<chr>,<date>,<drtn>,<chr>,<chr>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>
91444,2011-05-04,09:58:00,"94, 291",barnes county,101,white,male,vehicular,390902: Exceeded speed limit,citation,White,north dakota,"Barnes County, North Dakota",11097,104,0.9371902
138130,2011-12-28,11:41:00,gateway at 55th st,grand forks county,101,white,male,vehicular,391022: Failed to yield at intersection,citation,White,north dakota,"Grand Forks County, North Dakota",68979,1789,2.593543
118604,2011-09-16,19:03:00,"29, 104",traill county,102,white,male,vehicular,390902: Exceeded speed limit,citation,White,north dakota,"Traill County, North Dakota",8077,88,1.0895134
74973,2011-01-28,16:46:00,"200, 88",dunn county,11,black,male,vehicular,390902: Exceeded speed limit,citation,African American,north dakota,"Dunn County, North Dakota",4195,458,10.9177592
57595,2010-10-15,10:46:00,"29, 127",grand forks county,11,white,male,vehicular,390902: Exceeded speed limit,citation,White,north dakota,"Grand Forks County, North Dakota",68979,1789,2.593543
149505,2012-02-21,19:17:00,"85, 174",mckenzie county,11,white,male,vehicular,390902: Exceeded speed limit,citation,White,north dakota,"McKenzie County, North Dakota",9615,1709,17.774311


In [125]:
## 

nd_data %>%
    group_by(geography, na_pop_prop, raw_Race) %>%
    summarize(
        total_num_stops = n()
    ) %>%
    mutate(prop_of_stops = (total_num_stops/sum(total_num_stops)) * 100) %>%
    filter(raw_Race == "Native American") %>%
    mutate(disprop_stops = prop_of_stops - na_pop_prop) %>%
    arrange(desc(disprop_stops))

geography,na_pop_prop,raw_Race,total_num_stops,prop_of_stops,disprop_stops
<chr>,<dbl>,<chr>,<int>,<dbl>,<dbl>
"Towner County, North Dakota",2.136007,Native American,286,23.0088496,20.87284258
"Pierce County, North Dakota",1.8657565,Native American,361,14.9111937,13.04543718
"Bottineau County, North Dakota",2.4721134,Native American,488,10.5650574,8.09294402
"McHenry County, North Dakota",0.6532577,Native American,238,6.8371158,6.18385808
"Nelson County, North Dakota",1.312336,Native American,261,6.2906725,4.97833649
"Morton County, North Dakota",3.6329136,Native American,1127,7.6107509,3.97783737
"Ramsey County, North Dakota",9.8132457,Native American,2305,13.482686,3.66944029
"Ward County, North Dakota",1.3360694,Native American,1375,4.2476291,2.9115596
"Sheridan County, North Dakota",0.4487659,Native American,7,3.030303,2.58153714
"Eddy County, North Dakota",4.1772152,Native American,22,5.9782609,1.80104568


In [126]:
nd_data %>%
    group_by(geography, na_pop_prop, raw_Race) %>%
    summarize(
        total_num_stops = n()
    ) %>%
    mutate(prop_of_stops = (total_num_stops/sum(total_num_stops)) * 100) %>%
    filter(raw_Race == "Native American") %>%
    mutate(disprop_stops = prop_of_stops - na_pop_prop) %>%
    arrange(desc(na_pop_prop))

geography,na_pop_prop,raw_Race,total_num_stops,prop_of_stops,disprop_stops
<chr>,<dbl>,<chr>,<int>,<dbl>,<dbl>
"Sioux County, North Dakota",81.803653,Native American,2,9.5238095,-72.27984344
"Rolette County, North Dakota",77.3210098,Native American,2733,65.0559391,-12.26507073
"Benson County, North Dakota",54.9749779,Native American,166,6.5225933,-48.4523846
"Mountrail County, North Dakota",28.3799849,Native American,103,2.0,-26.37998487
"McKenzie County, North Dakota",17.774311,Native American,132,1.219963,-16.55434794
"Dunn County, North Dakota",10.9177592,Native American,170,3.5617012,-7.356058
"Ramsey County, North Dakota",9.8132457,Native American,2305,13.482686,3.66944029
"McLean County, North Dakota",7.1844249,Native American,755,7.5841286,0.39970365
"Oliver County, North Dakota",4.2880704,Native American,24,1.8348624,-2.45320798
"Eddy County, North Dakota",4.1772152,Native American,22,5.9782609,1.80104568


In [129]:
nd_data %>%
    filter(county == "sioux county")

raw_row_number,date,time,location,county,subject_age,subject_race,subject_sex,type,violation,outcome,raw_Race,state,geography,total_pop,total_na,na_pop_prop
<chr>,<date>,<drtn>,<chr>,<chr>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>
274195,2013-10-26,02:30:00,"24, 42.8",sioux county,22,white,male,vehicular,390804: Leaving the scene of accid. involving injury/death,citation,White,north dakota,"Sioux County, North Dakota",4380,3583,81.80365
284616|284617,2013-12-22,13:35:00,"24, 42.8",sioux county,22,white,male,vehicular,3909011: Care required|390809: Failure to give notice of reportable accident,citation,White,north dakota,"Sioux County, North Dakota",4380,3583,81.80365
230638,2013-04-01,15:50:00,"810, 1.5, At Bismarck",sioux county,28,other,female,,1211103: False information to a police officer,citation,Native American,north dakota,"Sioux County, North Dakota",4380,3583,81.80365
316800|316801,2014-06-13,15:12:00,"Cannonball road, 1806, 32, Junction",sioux county,28,white,male,vehicular,390642: Drove while license suspended(4 or more offenses)|390820: Driving without Liability Insurance,citation,White,north dakota,"Sioux County, North Dakota",4380,3583,81.80365
95471,2011-05-25,14:19:00,"49, 2",sioux county,28,white,male,vehicular,390902: Exceeded speed limit,citation,White,north dakota,"Sioux County, North Dakota",4380,3583,81.80365
395338|395339,2015-06-22,12:14:00,"6, 25",sioux county,30,white,male,vehicular,3921414: No seat belt|390902: Exceeded speed limit,citation,White,north dakota,"Sioux County, North Dakota",4380,3583,81.80365
230639,2013-04-01,15:50:00,Residence in Shields,sioux county,32,other,female,,1211103: False information to a police officer,citation,Native American,north dakota,"Sioux County, North Dakota",4380,3583,81.80365
37475,2010-07-18,16:42:00,"24, 32",sioux county,38,white,male,vehicular,390902: Exceeded speed limit,citation,White,north dakota,"Sioux County, North Dakota",4380,3583,81.80365
139262,2012-01-04,10:14:00,"24, 30",sioux county,39,white,male,vehicular,3921414: No seat belt,citation,White,north dakota,"Sioux County, North Dakota",4380,3583,81.80365
113888,2011-08-23,21:33:00,"1806, 32",sioux county,43,white,male,vehicular,3910334: Pedestrian under influence creating hazard,citation,White,north dakota,"Sioux County, North Dakota",4380,3583,81.80365


In [130]:
nd_data %>%
    group_by(geography, na_pop_prop, raw_Race) %>%
    summarize(
        total_num_stops = n()
    ) %>%
    mutate(prop_of_stops = (total_num_stops/sum(total_num_stops)) * 100) %>%
    filter(raw_Race == "Native American") %>%
    mutate(disprop_stops = prop_of_stops - na_pop_prop) %>%
    arrange(desc(total_num_stops))

geography,na_pop_prop,raw_Race,total_num_stops,prop_of_stops,disprop_stops
<chr>,<dbl>,<chr>,<int>,<dbl>,<dbl>
"Rolette County, North Dakota",77.3210098,Native American,2733,65.0559391,-12.26507073
"Ramsey County, North Dakota",9.8132457,Native American,2305,13.482686,3.66944029
"Ward County, North Dakota",1.3360694,Native American,1375,4.2476291,2.9115596
"Morton County, North Dakota",3.6329136,Native American,1127,7.6107509,3.97783737
"McLean County, North Dakota",7.1844249,Native American,755,7.5841286,0.39970365
"Grand Forks County, North Dakota",2.593543,Native American,687,2.200583,-0.39295998
"Burleigh County, North Dakota",3.7915283,Native American,582,4.1055305,0.31400219
"Bottineau County, North Dakota",2.4721134,Native American,488,10.5650574,8.09294402
"Cass County, North Dakota",1.168,Native American,364,0.9052249,-0.26277506
"Pierce County, North Dakota",1.8657565,Native American,361,14.9111937,13.04543718
