-
Notifications
You must be signed in to change notification settings - Fork 7
/
mm.R
139 lines (124 loc) · 5.96 KB
/
mm.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
#' @rdname mm
#' @title Marginal Means
#' @description Calculate (descriptive) marginal means (MMs) from a conjoint design
#' @param data A data frame containing variables specified in \code{formula}. All RHS variables should be factors.
#' @param formula A formula specifying an outcome (LHS) and conjoint features (RHS) to describe. All variables should be factors; all levels across features should be unique, with constraints specified with an asterisk (*) between features, as in \code{amce}.
#' @template id
#' @template weights
#' @template feature_order
#' @template feature_labels
#' @template level_order
#' @param h0 A numeric value specifying a null hypothesis value to use when generating z-statistics and p-values.
#' @template alpha
#' @param \dots Ignored.
#' @return A data frame of class \dQuote{cj_mm}
#' @details \code{mm} provides descriptive representations of conjoint data as marginal means (MMs), which represent the mean outcome across all appearances of a particular conjoint feature level, averaging across all other features. In forced choice conjoint designs with two profiles per choice task, MMs by definition average 0.5 with values above 0.5 indicating features that increase profile favorability and values below 0.5 indicating features that decrease profile favorability. For continuous outcomes, MMs can take any value in the full range of the outcome.
#'
#' But note that if feature levels can co-occur, such that both alternatives share a feature level, then the MMs on forced choice outcomes are bounded by the probability of co-occurrence (as a lower bound) and 1 minus that probability as an upper bound.
#'
#' Plotting functionality is provided in \code{\link{plot.cj_mm}}.
#'
#' @examples
#' \donttest{
#' data(immigration)
#' # marginal means
#' mm(immigration, ChosenImmigrant ~ Gender + Education + LanguageSkills,
#' id = ~ CaseID, h0 = 0.5)
#'
#' # higher-order marginal means with feature interactions
#' immigration$language_entry <-
#' interaction(immigration$LanguageSkills, immigration$PriorEntry, sep = "_")
#' mm(immigration, ChosenImmigrant ~ language_entry,
#' id = ~CaseID)
#' }
#' @seealso \code{\link{mm_diffs}} \code{\link{plot.cj_mm}}
#' @import stats
#' @importFrom survey svydesign svyby svymean
#' @export
mm <-
function(
data,
formula,
id = ~ 0,
weights = NULL,
feature_order = NULL,
feature_labels = NULL,
level_order = c("ascending", "descending"),
alpha = 0.05,
h0 = 0,
...
) {
# coerce to "cj_df" to preserve attributes
if (inherits(data, "survey.design")) {
stop("mm() does not currently support passing a 'survey.design' object as 'data'")
# data2 <- cj_df(data[["variables"]])
} else {
data2 <- cj_df(data)
}
# get outcome variable
outcome <- all.vars(stats::update(formula, . ~ 0))
if (!length(outcome) || outcome == ".") {
stop("'formula' is missing a left-hand outcome variable")
}
# get RHS variables, variable labels, and factor levels
RHS <- all.vars(stats::update(formula, 0 ~ . ))
# process feature_order argument
feature_order <- check_feature_order(feature_order, RHS)
# set level_order (within features) to ascending or descending
level_order <- match.arg(level_order)
# function used in cj and ammplot to produce "fancy" feature labels
feature_labels <- clean_feature_labels(data = data2, RHS = RHS, feature_labels = feature_labels)
# convert feature labels and levels to data frame
term_labels_df <- make_term_labels_df(data2, feature_order, level_order = level_order)
# get `weights` as character string
if (!is.null(weights)) {
weightsvar <- all.vars(update(weights, 0 ~ . ))[1L]
data2[["CREGG_WEIGHT"]] <- data2[[weightsvar]]
} else {
weights <- ~ 0
data2[["CREGG_WEIGHT"]] <- 1
}
# get `id` as character string
if (length(all.vars(id))) {
idvar <- all.vars(id)[1L]
} else {
id <- ~ 0
idvar <- NULL
}
# reshape data so we can just do svyglm(OUTCOME ~ 0 + LEVEL) in a loop
long <- stats::reshape(data2[c(outcome, RHS, idvar, "CREGG_WEIGHT")],
varying = list(names(data2[RHS])),
v.names = "LEVEL",
timevar = "Feature",
times = names(data2[RHS]),
idvar = "observation",
direction = "long")
names(long)[names(long) == outcome] <- "OUTCOME"
# calculate MMs, SEs, etc.
out_list <- lapply(unique(long[["Feature"]]), function(this_feature) {
dtmp <- long[long[["Feature"]] == this_feature, , drop = FALSE]
svyd <- survey::svydesign(ids = id, weights = ~ CREGG_WEIGHT, data = dtmp)
mod <- survey::svyglm(OUTCOME ~ 0 + LEVEL, design = svyd)
cs <- get_coef_summary(mod = mod, data = data, id = NULL, alpha = alpha)
names(cs) <- c("estimate", "std.error", "z", "p", "lower", "upper", "level")
## correct z test to respect h0
cs[["z"]] <- (cs[["estimate"]] - h0)/cs[["std.error"]]
cs[["p"]] <- 2L * stats::pnorm(-abs(cs[["z"]]))
cs[["level"]] <- factor(sub("^LEVEL", "", cs[["level"]]))
cs
})
out <- do.call("rbind", out_list)
# attach feature labels
out <- merge(out, make_term_labels_df(data2, RHS), by = c("level"), all = TRUE)
out[["level"]] <- factor(out[["level"]], levels = term_labels_df[["level"]])
out[["feature"]] <- factor(out[["feature"]],
levels = feature_order,
labels = feature_labels[feature_order])
out[["outcome"]] <- outcome
# return organized data frame
out[["statistic"]] <- "mm"
out <- out[c("outcome", "statistic", "feature", "level", "estimate", "std.error", "z", "p", "lower", "upper")]
out <- out[order(out[["level"]]),]
rownames(out) <- seq_len(nrow(out))
return(structure(out, class = c("cj_mm", "data.frame")))
}