This repository has been archived by the owner on Dec 25, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
get.R
195 lines (154 loc) · 5.62 KB
/
get.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
#' Download Agency Data
#'
#' @description Download a single agency's data from the Missouri Attorney
#' General's Office.
#'
#' @details The statistics that can be downloaded for each agency are as follows:
#'
#' \describe{
#' \item{\code{"Disparity"}}{}
#' \item{\code{"Stops"}}{}
#' \item{\code{"Searches"}}{}
#' \item{\code{"Arrests"}}{}
#' \item{\code{"Contraband"}}{}
#' }
#'
#' @return A tibble in 'long' format containing vehicle stop statistics for the
#' requested agency. Data may need additional formatting - see \code{\link{mv_reformat}}.
#'
#' @param browser Name of a browser binding created with \code{RSelenium}
#' @param agency An agency name (for St. Louis City and County agencies) or id number
#' (for agencies statewide).
#' @param statistic The statistic to download
#' @param pause Number of seconds to pause while pages load - adjust as
#' needed depending on internet connection
#' @param add_agency Append agency name to returned data; set to \code{FALSE} if you
#' are downloading data outside of St. Louis City or County
#'
#' @importFrom dplyr mutate rename
#' @importFrom rvest html_nodes html_table
#' @importFrom tidyr gather
#' @importFrom xml2 read_html
#'
#' @export
mv_get_agency <- function(browser, agency, statistic, pause = 3, add_agency = TRUE){
# global bindings
`2000` = year = NULL
# get agency id
if (is.numeric(agency) == FALSE){
# return id
ag <- mv_agency_id(agency = agency)
# ensure id is not 'NULL'
if (is.null(ag) == TRUE){
stop("Agency name not found. Use the 'agency' object to find valid names, or provide an id number.")
}
} else if (is.numeric(agency) == TRUE){
ag <- agency
}
# navigate browser
browser$navigate(paste0("https://ago.mo.gov/home/vehicle-stops-report?lea=", ag))
# pause
Sys.sleep(pause)
# menu option
if (statistic == "Disparity" | statistic == "Disparity Index"){
option <- browser$findElement(using = 'xpath', "//*/option[@value = 'disparityIndex']")
} else if (statistic == "Stops" | statistic == "Stop Rate"){
option <- browser$findElement(using = 'xpath', "//*/option[@value = 'totalStops']")
} else if (statistic == "Searches" | statistic == "Search Rate"){
option <- browser$findElement(using = 'xpath', "//*/option[@value = 'totalStopsSearches']")
} else if (statistic == "Arrests" | statistic == "Arrest Rate"){
option <- browser$findElement(using = 'xpath', "//*/option[@value = 'driversArrested']")
} else if (statistic == "Contraband" | statistic == "Contraband Hit Rate" | statistic == "Hit Rate"){
option <- browser$findElement(using = 'xpath', "//*/option[@value = 'totalStopsDiscovery']")
}
# select menu item
option$clickElement()
# pause
Sys.sleep(pause)
# download page
pg <- xml2::read_html(browser$getPageSource()[[1]])
# get table
tables <- rvest::html_nodes(pg, "table")
tables <- rvest::html_table(tables, fill = TRUE)
df <- tables[[1]]
# get name of last column
x <- rev(names(df))[1]
# pivot to long
out <- tidyr::gather(df, key = "year", value = "value", `2000`:x)
# tidy
out <- dplyr::rename(out, cat = "")
out <- dplyr::mutate(out, cat = ifelse(cat == "Native American", "Native", cat))
out <- dplyr::mutate(out, cat = ifelse(cat == "Totals", "Total", cat))
out <- dplyr::arrange(out, cat)
out <- dplyr::mutate(out, year = as.numeric(year))
# optionally add agency name
if (add_agency == TRUE){
# get name
name <- mv_agency_name(agency = ag)
# add name
out <- dplyr::mutate(out, agency = name)
out <- dplyr::select(out, agency, dplyr::everything())
}
# convert to tibble
out <- dplyr::as_tibble(out)
# return output
return(out)
}
# return agency id number
mv_agency_id <- function(agency){
# global bindings
agencies = name = NULL
# load data
data <- agencies
# subset
data <- dplyr::filter(data, name == agency)
# return output
if (nrow(data) == 1){
out <- data[[1]]
} else if (nrow(data) == 0){
out <- NULL
}
}
# return agency name
mv_agency_name <- function(agency){
# global bindings
agencies = id = NULL
# subset
data <- dplyr::filter(movsr::agencies, id == agency)
# return id
out <- data[[2]]
}
#' Batch Agency Downloads
#'
#' @description Designed to be including in a \code{purrr} call. This wraps
#' \code{\link{mv_get_agency}}, \code{\link{mv_reformat}}, and \code{\link{mv_filter}}.
#'
#' @usage mv_batch_agency(browser, agency, statistic, format, category, year, pause = 3)
#'
#' @return A tibble with the formatted and subset data.
#'
#' @param browser Name of a browser binding created with \code{RSelenium}
#' @param agency An agency name (for St. Louis City and County agencies) or id number
#' (for agencies statewide).
#' @param statistic The statistic to download (see \code{\link{mv_get_agency}})
#' @param format Style to reformat raw data (see \code{\link{mv_reformat}})
#' @param category Category to extract (see \code{\link{mv_filter}})
#' @param year Year to extract (see \code{\link{mv_filter}})
#' @param pause Number of seconds to pause while pages load - adjust as
#' needed depending on internet connection
#'
#' @export
mv_batch_agency <- function(browser, agency, statistic, format, category, year, pause = 3){
# issues with year argument
x <- as.character(year)
# pull data
data <- mv_get_agency(browser = browser, agency = agency, statistic = statistic, pause = pause)
# reformat
if (statistic == "Stops" & format != "count"){
formatted_data <- mv_reformat(data, statistic = statistic, format = format)
}
# subset
out <- mv_filter(data, category = category, year = x)
# return output
return(out)
}