This repository has been archived by the owner on Dec 26, 2023. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 3
/
collapse.R
219 lines (195 loc) · 7.72 KB
/
collapse.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
#' Collapse Months in Year List Object into Single Tibble
#'
#' @description This function takes a year-list containing individual tibbles - one per
#' month - that have been validated and collapses them into a single tibble.
#'
#' @details \code{cs_collapse} applies common sense variable classes to a number
#' of variables. This is motivated by issues that originate with SLMPD \code{.csv}
#' files. When they are imported, the \pkg{readr} package sometimes applies
#' the incorrect variable classes because of formatting issues in the tables.
#' Since the tables have inconsistent variable names and numbers of variables,
#' all variables are imported as \code{chr} data. During \code{cs_collapse}'s
#' execution, the following changes are made:
#'
#' \describe{
#' \item{Count}{Converted to \code{int}}
#' \item{Crime}{Converted to \code{int}}
#' \item{District}{Converted to \code{int}}
#' \item{ILEADSAddress}{Converted to \code{int}}
#' \item{Neighborhood}{Converted to \code{int}}
#' \item{CADAddress}{Converted to \code{int}}
#' \item{XCoord}{Converted to \code{dbl}}
#' \item{YCoord}{Converted to \code{dbl}}
#' }
#'
#' @usage cs_collapse(.data)
#'
#' @param .data A list containing monthly crime data
#'
#' @return A tibble containing all crime data in a given year-list object.
#'
#' @examples
#' # load example year-list object
#' load(system.file("testdata", "yearList17.rda", package = "compstatr", mustWork = TRUE))
#'
#' # validate
#' cs_validate(yearList17, year = 2017)
#'
#' # standaridze May, which has 26 variables
#' yearList17 <- cs_standardize(yearList17, month = "May", config = 26)
#'
#' # validate again to confirm fix
#' cs_validate(yearList17, year = 2017)
#'
#' # collapse now that the data are valid
#' crimeReports17 <- cs_collapse(yearList17)
#'
#' @importFrom dplyr %>%
#' @importFrom dplyr arrange
#' @importFrom dplyr bind_rows
#' @importFrom dplyr mutate
#' @importFrom purrr map
#'
#' @export
cs_collapse <- function(.data){
# undefined global variables
coded_month = date_occur = cad_address = count = crime = district = ileads_address = neighborhood = x_coord = y_coord = NULL
# check for missing parameters
if (missing(.data)) {
stop('A existing year-list object must be specified for .data.')
}
# extract each month, collapse, and re-order
purrr::map(.data, dplyr::as_tibble) %>%
dplyr::bind_rows() %>%
dplyr::arrange(coded_month, date_occur) -> out
# reformat variables
out %>%
dplyr::mutate(
count = as.integer(count),
crime = as.integer(crime),
district = as.integer(district),
ileads_address = suppressWarnings(as.numeric(ileads_address)),
neighborhood = as.integer(neighborhood),
cad_address = suppressWarnings(as.numeric(cad_address)),
x_coord = as.numeric(x_coord),
y_coord = as.numeric(y_coord)
) -> out
# return combined object
return(out)
}
#' Ensure Objects Contain Data Only For a Given Year
#'
#' @description Since crimes are sometimes reported well after they are committed, objects created
#' with \code{cs_collapse} often contain crimes that occurred in prior years. The \code{cs_combine}
#' function ensures that objects contain only data for a given year, with the ability to add in
#' crimes reported for the given year in later years.
#'
#' @details When applied to a single year's worth of data, \code{cs_combine} will subset out
#' any crimes that occured in a year other than the one given for the \code{date} argument.
#'
#' When applied to a range of objects, such as objects for 2017 and 2018, each object will be
#' subset to identify crimes that occured in the year given for the \code{date} argument. This
#' creates a more complete accounting of crime in a given year since it adds in crimes reported
#' in subsequent years to the object. At the same time, crimes that occured prior to the given
#' year will also be subset out to ensure the resulting object only contains crimes
#' that occured in that given year.
#'
#' @usage cs_combine(type = "year", date, ...)
#'
#' @param type "year" is the only valid input currently; year to date functionality
#' is planned for a later update
#' @param date For \code{type = "year"}, this should be the year of data to be returned.
#' For \code{type = "ytd"}, this should be the last month to be included in each estimate.
#' @param ... An unquoted list of objects
#'
#' @return A tibble containing a selection of combined crime data for a given time period.
#'
#' @importFrom dplyr %>%
#' @importFrom dplyr arrange
#' @importFrom dplyr bind_rows
#' @importFrom dplyr everything
#' @importFrom dplyr filter
#' @importFrom dplyr is.tbl
#' @importFrom dplyr select
#' @importFrom lubridate year
#' @importFrom purrr map_lgl
#' @importFrom rlang is_scalar_character
#' @importFrom rlang list2
#'
#' @examples
#' # load example year-list objects
#' load(system.file("testdata", "yearList17.rda", package = "compstatr", mustWork = TRUE))
#' load(system.file("testdata", "yearList18.rda", package = "compstatr", mustWork = TRUE))
#'
#' # validate
#' cs_validate(yearList17, year = 2017)
#' cs_validate(yearList18, year = 2018)
#'
#' # standaridze May for the 2017 object, which has 26 variables
#' yearList17 <- cs_standardize(yearList17, month = "May", config = 26)
#'
#' # validate again to confirm fix
#' cs_validate(yearList17, year = 2017)
#'
#' # collapse now that the data are valid
#' crimeReports17 <- cs_collapse(yearList17)
#' crimeReports18 <- cs_collapse(yearList18)
#'
#' # combine to add all sample 2017 crimes reported in 2018 to a single 2017 object
#' # and remove from our 2017 object all sample crimes reported in 2017 that occured prior
#' # to that year
#' crime17 <- cs_combine(type = "year", date = 2017, crimeReports17, crimeReports18)
#'
#' @export
cs_combine <- function(type = "year", date, ...){
# global bindings
date_occur = time = dateTime = cs_year = ...date = ...time = NULL
# check missing parameters
if (dplyr::is.tbl(type) == TRUE){
stop("A timeframe must be given for 'type'. At this time, only 'year' is a valid argument.")
}
if (dplyr::is.tbl(date) == FALSE){
if (class(date) != "numeric"){
stop("An integer year must be given for 'date'.")
}
} else if (dplyr::is.tbl(date) == TRUE){
stop("An integer year must be given for 'date'.")
}
# capture dots
list <- rlang::list2(...)
# check for missing tibble
if (length(list) == 0){
stop("At least one tibble must be supplied after the 'type' and 'date' arguments.")
}
# check dots
list %>%
purrr::map_lgl(.f = dplyr::is.tbl) -> tblCheck
# evaluate result
if (all(tblCheck) == FALSE){
stop("Only tibbles containing collapsed objects may be passed through the dots as arguments.")
}
# check for incorrect parameters
if (rlang::is_scalar_character(type) == FALSE){
stop("The output type must be a character scalar. At this time, only 'year' is a valid argument.")
} # Select one of 'year' or 'ytd'.
if (type %in% c("year", "ytd") == FALSE){
stop("The output type must be a character scalar. At this time, only 'year' is a valid argument.")
}
# combine
if (type == "year"){
# combine listed objects
results <- dplyr::bind_rows(...)
# create temporary date varible, filter based on supplied year, then arrange
results %>%
cs_parse_date(var = date_occur, dateVar = ...date, timeVar = ...time, keepDateTime = TRUE) %>%
dplyr::filter(year(...date) == date) %>%
dplyr::arrange(dateTime) %>%
dplyr::mutate(cs_year = lubridate::year(dateTime)) %>%
dplyr::select(-...date, -...time, -dateTime) %>%
dplyr::select(cs_year, dplyr::everything()) -> results
} else if (type == "ytd"){
stop("This functionality is still in progress.")
}
# return output
return(results)
}