-
Notifications
You must be signed in to change notification settings - Fork 0
/
recode_religion.R
249 lines (202 loc) · 10.2 KB
/
recode_religion.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
#' Recode Religion from GSS.
#'
#' Function recodes religious identification from Genearl Social Survey,
#' based on three variables: \code{relig}, \code{denom}, and \code{other}.
#' It can successfully recode either respondent's, or any other religious
#' identification which is determened by coresponding three variables.
#'
#' \code{recode_religion} uses schema developed by Darren E. Sherkat and
#' Derek Lehman in \href{https://iranianredneck.wordpress.com/2016/11/29/why-reltrad-sucks-contesting-the-measure-of-american-religion/}{"After
#' The Resurrection: The Field of the Sociology of Religion in the United
#' States"}, and is effectievly translation of that SPSS syntax (the bare
#' bone function for recoding is \link{fct_rec_relig}), with
#' additional functionality.
#'
#' Namley, it can handle both punches and labels
#' at the same time (but in different variables), which is important since
#' punches are not consequtive as indexes. In addition, function checks that
#' variables are adequate (i.e. that all values are in codebook) and of same
#' length, and also handles missing values: (1) if supplied through values,
#' provides detail recoding; (2) if \code{NA}, lumps them together in final
#' variable but uses them correctly in the recoding. Through passed
#' arguments, one can:
#' \enumerate{
#' \item Return longer version, of 12 religious identifications.
#' \item Add identifications from schema that are not present in
#' sample as empty levels.
#' \item Suppress printing of the frequencies of newly recoded variable.
#' \item Print unique key of the values that were recoded.
#' \item Return values as numerical factor, in which case the codebook
#' for new variable will be printed.
#' }
#' If \code{frequencies} is passed as \code{FALSE}, and numerical vector
#' is not requested as return value, all other information, such as
#' treatment of missing values, are provided as messages that can be
#' suppressed.
#'
#' More details can be found on \href{https://github.mdjeric}{github.mdjeric}.
#'
#' @param relig,denom,other Numerical, character, or factor, all of same
#' length and with coresponding punches or labels in codebook.
#' @param n_groups Number, 7 (default) or 12 of new religious identifications.
#' @param add_missing_levels Logical, to include as empty levels religious
#' identifications that may not be present in specific sample, but are
#' part of recoding schema.
#' @param frequencies Logical, to print frequency and percent table of
#' recoded religius identification (default is \code{TRUE}).
#' @param print_key Logical, to print the all unique tetrads of recoded
#' variables, i.e. a recoding key.
#' @param return_num Logical, to return numerical factor and print codebook.
#'
#' @return Vector with recoded religion from \code{relig}, \code{denom},
#' and \code{other}. Function does not return \code{NA}, but as
#' factor levels "Not answered" and "Don't know", or combined
#' "Not answered/Don't know" when missing values are not
#' declared as punches or labels in initial variables but passed on as
#' \code{NA} (function gives message and where \code{NAs} are lcoated).
#' Default is to have factor with 7 descriptive levels, but function can
#' also return numerical vector. Default behavior returns only present
#' values, but can be made to add additional empty levels if \code{TRUE}
#' is passed to \code{add_missing_levels}. Function also \strong{prints}
#' frequency table of newly recoded religious identification, which can be
#' suppressed with \code{frequencies}. If required, it can also return
#' numerical vector and print the coding for it (not recommended).
#'
#' @examples
#' library(resurrectionr)
#'
#' # When all variables are factor
#' gss14_f$religion <- recode_religion(gss14_f$relig, gss14_f$denom,
#' gss14_f$other, frequencies = FALSE)
#' # Twelve groups
#' gss14_f$religion <- recode_religion(gss14_f$relig, gss14_f$denom,
#' gss14_f$other, n_groups = 12)
#'
#' # When all variables are numeric
#' gss14_n$religion <- recode_religion(gss14_n$relig, gss14_n$denom,
#' gss14_n$other,
#' add_missing_levels = TRUE)
#'
#' # But also, combining them works
#' religion <- recode_religion(gss14_f$relig, gss14_n$denom,
#' as.character(gss14_f$other))
#' @export
recode_religion <- function(relig, denom, other, n_groups = 7,
add_missing_levels = FALSE, frequencies = TRUE,
print_key = FALSE, return_num = FALSE) {
# Check that all arguments are of appropriate type ------------------------
arg_err <- list(error = FALSE, name = c(), type = c())
if (!(is.numeric(relig) | is.factor(relig) | is.character(relig)))
arg_err <- add_error(arg_err, "relig",
"vector (numeric or character) or factor")
if (!(is.numeric(denom) | is.factor(denom) | is.character(denom)))
arg_err <- add_error(arg_err, "denom",
"vector (numeric or character) or factor")
if (!(is.numeric(other) | is.factor(other) | is.character(other)))
arg_err <- add_error(arg_err, "other",
"vector (numeric or character) or factor")
if (!(as.character(n_groups) %in% c("7", "12")))
arg_err <- add_error(arg_err, "n_groups", "7 or 12")
if (!is.logical(add_missing_levels))
arg_err <- add_error(arg_err, "add_missing_levels", "logical")
if (!is.logical(frequencies))
arg_err <- add_error(arg_err, "frequencies", "logical")
if (!is.logical(print_key))
arg_err <- add_error(arg_err, "print_key", "logical")
if (!is.logical(return_num))
arg_err <- add_error(arg_err, "return_num", "logical")
# stop if one or more arguments are mismatched
if (arg_err$error) stop("Arguments are not of appropriate type:\n",
sprintf(" * `%s` must be %s.\n",
arg_err$name, arg_err$type),
call. = FALSE
)
# Check vector lengths ----------------------------------------------------
if ((length(relig) != length(denom)) |
(length(relig) != length(other)) |
(length(denom) != length(other)) )
stop("Vectors must be of the same lenght, currently they are:\n",
"* Length of `relig`: ", length(relig), ".\n",
"* Length of `denom`: ", length(denom), ".\n",
"* Length of `other`: ", length(other), ".\n",
call. = FALSE
)
# Check for NA and transform them -----------------------------------------
merge_na_dk <- FALSE
if (any(is.na(relig)) & any(is.na(denom)) & any(is.na(other))) {
message("Some of the variables contain NA: `Don't know` and `NA`",
"will be merged. Please see documentation for more details.")
merge_na_dk <- TRUE
}
relig <- transform_rdo(relig, "relig")
denom <- transform_rdo(denom, "denom")
other <- transform_rdo(other, "other")
# Check that there are no unallowed values --------------------------------
error_values <- list(error = FALSE, name = c(), type = c())
if (is.logical(relig[[1]])) error_values <- add_error(error_values,
relig[[2]],
relig[[3]])
if (is.logical(denom[[1]])) error_values <- add_error(error_values,
denom[[2]],
denom[[3]])
if (is.logical(other[[1]])) error_values <- add_error(error_values,
other[[2]],
other[[3]])
if (error_values$error) {
stop("Variables with values that are not in codebook:\n",
sprintf(" * `%s` has, for example: %s.\n",
error_values$name, error_values$type),
call. = FALSE)
}
# Create recoded religion -------------------------------------------------
religion <- fct_rec_relig(relig, denom, other)
if (merge_na_dk) {
religion[religion == "Don't know"] <- "Don't know/No answer"
religion[religion == "No answer"] <- "Don't know/No answer"
}
# Reduce groups if needed -------------------------------------------------
all_levels <- c("Sectarian Protestant", "Baptist",
"Moderate Protestant", "Christian, no group given", "Lutheran",
"Liberal Protestant", "Episcopalian",
"Catholic and Orthodox",
"Other religions", "Jew",
"Mormon",
"No identification"
)
if (as.character(n_groups) == "7") {
names(all_levels) <- c(1, 11, 2, 21, 22, 3, 31, 4, 5, 51, 6, 7)
religion[religion %in% all_levels[c("1", "11")]] <- all_levels["1"]
religion[religion %in% all_levels[c("2", "21", "22")]] <- all_levels["2"]
religion[religion %in% all_levels[c("3", "31")]] <- all_levels["3"]
religion[religion %in% all_levels[c("5", "51")]] <- all_levels["5"]
all_levels <- unname(all_levels[as.character(c(1:7))])
}
religion <- as.factor(religion)
# Add missing levels ------------------------------------------------------
if (add_missing_levels) {
missing_levels <- !(all_levels %in% levels(religion))
if (TRUE %in% missing_levels)
levels(religion) <- c(levels(religion), all_levels[missing_levels])
}
# Print frequencies -------------------------------------------------------
if (frequencies) print_frequencies(religion)
# Print recoding key ------------------------------------------------------
if (print_key) {
DF <- data.frame('relig' = relig,
'denom' = denom,
'other' = other,
'religion' = religion
)
print_key(DF)
}
# Change to numeric -------------------------------------------------------
if (return_num) {
indeksi <- 1:length(levels(religion))
message("ATTENTION: Returned is numeric vector without attributes, ",
"and with follwoing coressponding values: ",
sprintf("[%i] %s * ", indeksi, levels(religion)))
religion <- as.numeric(religion)
}
# Return vector -----------------------------------------------------------
return(religion)
}