-
Notifications
You must be signed in to change notification settings - Fork 0
/
fit_measures_change_approx.R
340 lines (329 loc) · 12.4 KB
/
fit_measures_change_approx.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
#' @title Case Influence on Fit Measures (Approximate)
#'
#' @description Gets a [lavaan::lavaan()] output and computes the
#' approximate change
#' in selected fit measures if a case is included.
#'
#' @details For each case, [fit_measures_change_approx()] computes the
#' approximate differences in selected fit measures with and
#' without this case:
#'
#' (Fit measure with all case) - (Fit measure without this case).
#'
#' If the value of a case is positive, including the case increases an estimate.
#'
#' If the value of a case is negative, including the case decreases an estimate.
#'
#' Note that an increase is an improvement in fit for
#' goodness of fit measures such as CFI and TLI, but a decrease
#' is an improvement in fit for badness of fit measures such as
#' RMSEA and model chi-square.
#' This is a measure of the influence of a case on a fit measure
#' if it is included.
#'
#' The model is not refitted. Therefore, the result is only an
#' approximation of that of [fit_measures_change()]. However, this
#' approximation is useful for identifying potentially influential
#' cases when the sample size is very large or the model takes a long
#' time to fit. This function can be used to identify potentially
#' influential cases quickly and then select them to conduct the
#' leave-one-out sensitivity analysis using [lavaan_rerun()] and
#' [fit_measures_change()].
#'
#' For the technical details, please refer to the vignette
#' on this approach: \code{vignette("casewise_scores", package = "semfindr")}
#'
#' Supports both single-group and multiple-group models.
#' (Support for multiple-group models available in 0.1.4.8 and later version).
#'
#' @param fit The output from [lavaan::lavaan()] or its wrappers (e.g.,
#' [lavaan::cfa()] and [lavaan::sem()]).
#'
#' @param fit_measures The argument `fit.measures` used in
#' [lavaan::fitMeasures]. Default is
#' `c("chisq", "cfi", "rmsea", "tli")`.
#' Currently, the approximate method supports only
#' these four measures.
#'
#' @param baseline_model The argument `baseline.model` used in
#' [lavaan::fitMeasures]. Default is `NULL`.
#'
#' @param case_id If it is a character vector of length equals to the
#' number of cases (the number of rows in the data in `fit`), then it
#' is the vector of case identification values. If it is `NULL`, the
#' default, then `case.idx` used by `lavaan` functions will be used
#' as case identification values.
#'
#' @param allow_inadmissible If `TRUE`, accepts a fit object with
#' inadmissible results (i.e., `post.check` from
#' [lavaan::lavInspect()] is `FALSE`). Default is `FALSE`.
#'
#' @param skip_all_checks If `TRUE`, skips all checks and allows
#' users to run this function on any object of `lavaan` class.
#' For users to experiment this and other functions on models
#' not officially supported. Default is `FALSE`.
#'
#' @return An `fit_measures_change`-class object, which is
#' matrix with the number of columns equals to the number of
#' requested fit measures, and the number of rows equals to the number
#' of cases. The row names are case identification values.
#' A print method is available for user-friendly output.
#'
#' @author Idea by Mark Hok Chio Lai <https://orcid.org/0000-0002-9196-7406>,
#' implemented by Shu Fai Cheung <https://orcid.org/0000-0002-9871-9448>.
#'
#'
#' @examples
#' library(lavaan)
#'
#' # A path model
#'
#' dat <- pa_dat
#' mod <-
#' "
#' m1 ~ a1 * iv1 + a2 * iv2
#' dv ~ b * m1
#' a1b := a1 * b
#' a2b := a2 * b
#' "
#' # Fit the model
#' fit <- lavaan::sem(mod, dat)
#' summary(fit)
#'
#' # Approximate changes
#' out_approx <- fit_measures_change_approx(fit, fit_measures = "chisq")
#' head(out_approx)
#' # Fit the model several times. Each time with one case removed.
#' # For illustration, do this only for four selected cases
#' fit_rerun <- lavaan_rerun(fit, parallel = FALSE,
#' to_rerun = 1:5)
#' # Compute the changes in chisq if a case is included
#' # vs. if this case is excluded.
#' # That is, case influence on model chi-squared.
#' out <- fit_measures_change(fit_rerun, fit_measures = "chisq")
#' # Case influence, for the first few cases
#' head(out)
#' # Compare the results
#' plot(out_approx[1:5, "chisq"], out)
#' abline(a = 0, b = 1)
#'
#' # A CFA model
#'
#' dat <- cfa_dat
#' mod <-
#' "
#' f1 =~ x1 + x2 + x3
#' f2 =~ x4 + x5 + x6
#' f1 ~~ f2
#' "
#' # Fit the model
#' fit <- lavaan::cfa(mod, dat)
#'
#' out_approx <- fit_measures_change_approx(fit, fit_measures = "chisq")
#' head(out_approx)
#'
#' fit_rerun <- lavaan_rerun(fit, parallel = FALSE,
#' to_rerun = 1:5)
#' # Compute the changes in chisq if a case is included
#' # vs. if this case is excluded.
#' # That is, case influence on fit measures.
#' out <- fit_measures_change(fit_rerun, fit_measures = "chisq")
#' # Results excluding a case, for the first few cases
#' head(out)
#' # Compare the results
#' plot(out_approx[1:5, "chisq"], out)
#' abline(a = 0, b = 1)
#'
#' # A latent variable model
#'
#' dat <- sem_dat
#' mod <-
#' "
#' f1 =~ x1 + x2 + x3
#' f2 =~ x4 + x5 + x6
#' f3 =~ x7 + x8 + x9
#' f2 ~ a * f1
#' f3 ~ b * f2
#' ab := a * b
#' "
#' # Fit the model
#' fit <- lavaan::sem(mod, dat)
#'
#' out_approx <- fit_measures_change_approx(fit, fit_measures = "chisq")
#' head(out_approx)
#'
#' fit_rerun <- lavaan_rerun(fit, parallel = FALSE,
#' to_rerun = 1:5)
#' # Compute the changes in chisq if a case is excluded
#' # vs. if this case is included.
#' # That is, case influence on model chi-squared.
#' out <- fit_measures_change(fit_rerun, fit_measures = "chisq")
#' # Case influence, for the first few cases
#' head(out)
#' # Compare the results
#' plot(out_approx[1:5, "chisq"], out)
#' abline(a = 0, b = 1)
#'
#' @importFrom methods .hasSlot
#' @export
fit_measures_change_approx <- function(fit,
fit_measures = c("chisq",
"cfi",
"rmsea",
"tli"),
baseline_model = NULL,
case_id = NULL,
allow_inadmissible = FALSE,
skip_all_checks = FALSE
) {
fit_measures <- match.arg(fit_measures, several.ok = TRUE)
if (length(fit_measures) == 0) stop("No measure is requested.")
if (missing(fit)) {
stop("No lavaan output supplied.")
}
if (!inherits(fit, "lavaan")) {
stop("The fit object is not a lavaan output.")
}
if (!skip_all_checks) {
check_out <- approx_check(fit, print_messages = FALSE,
multiple_group = TRUE,
equality_constraint = TRUE)
if (check_out != 0) {
if ((check_out == -1) &&
!(suppressWarnings(lavaan::lavInspect(fit, "post.check"))) &&
allow_inadmissible) {
} else {
stop(attr(check_out, "info"))
}
}
}
ngroups <- lavaan::lavTech(fit, "ngroups")
if (ngroups > 1) {
n_j <- sapply(lavaan::lavInspect(fit, "data"), nrow)
n <- sum(n_j)
} else {
n <- nrow(lavaan::lavInspect(fit, "data"))
n_j <- n
}
case_ids <- lavaan::lavInspect(fit, "case.idx",
drop.list.single.group = FALSE)
case_ids <- unlist(case_ids, use.names = FALSE)
case_ids_order <- order(case_ids)
case_ids <- sort(case_ids)
if (!is.null(case_id)) {
if (length(case_id) != n) {
stop("The length of case_id is not equal to the number of cases.")
} else {
case_ids <- case_id
}
}
out <- matrix(NA, nrow = n, ncol = length(fit_measures))
colnames(out) <- fit_measures
rownames(out) <- case_ids
mod_h1 <- lavaan::lav_partable_unrestricted(fit)
opt_h1 <- fit@Options
opt_h1$se <- "none"
fit_h1 <- lavaan::lavaan(model = mod_h1,
slotOptions = opt_h1,
slotSampleStats = fit@SampleStats,
slotData = fit@Data)
lli_h1 <- lavaan::lavInspect(fit_h1, what = "loglik.casewise",
drop.list.single.group = FALSE)
lli <- lavaan::lavInspect(fit, what = "loglik.casewise",
drop.list.single.group = FALSE)
lli_h1 <- unlist(lli_h1, use.names = FALSE)[case_ids_order]
lli <- unlist(lli, use.names = FALSE)[case_ids_order]
chisq_change_fit <- 2 * (lli_h1 - lli)
if ("chisq" %in% fit_measures) {
out[, "chisq"] <- chisq_change_fit
}
tmp <- lavaan::fitMeasures(fit, c("chisq", "df"))
chisq_fit <- unname(tmp["chisq"])
df_fit <- unname(tmp["df"])
chisq_fit_approx <- chisq_fit - chisq_change_fit
# Adapted from lavaan:::lav_fit_measures
if (!is.null(baseline_model)) {
} else if (.hasSlot(fit, "baseline") &&
length(fit@baseline) > 0L) {
mod_base <- lavaan::lavInspect(fit,
what = "baseline.partable")
opt_baseline <- fit@Options
opt_baseline$se <- "none"
baseline_model <- lavaan::lavaan(model = mod_base,
slotOptions = opt_baseline,
slotSampleStats = fit@SampleStats,
slotData = fit@Data)
} else {
mod_base <- lavaan::lav_partable_independence(fit)
opt_baseline <- fit@Options
opt_baseline$se <- "none"
baseline_model <- lavaan::lavaan(model = mod_base,
slotOptions = opt_baseline,
slotSampleStats = fit@SampleStats,
slotData = fit@Data)
}
lli_base <- lavaan::lavInspect(baseline_model,
what = "loglik.casewise",
drop.list.single.group = FALSE)
lli_base <- unlist(lli_base, use.names = FALSE)[case_ids_order]
tmp <- lavaan::fitMeasures(baseline_model, c("chisq", "df"))
chisq_base <- unname(tmp["chisq"])
df_base <- unname(tmp["df"])
chisq_change_base <- 2 * (lli_h1 - lli_base)
chisq_base_approx <- chisq_base - chisq_change_base
if ("cfi" %in% fit_measures) {
cfi_fit <- lavaan::fitMeasures(fit, "cfi",
baseline.model = baseline_model)
cfi_approx <- mapply(cfi, chisq_fit = chisq_fit_approx,
chisq_baseline = chisq_base_approx,
MoreArgs = list(df_fit = df_fit,
df_baseline = df_base))
cfi_change <- cfi_fit - cfi_approx
out[, "cfi"] <- cfi_change
}
if ("tli" %in% fit_measures) {
tli_fit <- lavaan::fitMeasures(fit, "tli",
baseline.model = baseline_model)
tli_approx <- mapply(tli, chisq_fit = chisq_fit_approx,
chisq_baseline = chisq_base_approx,
MoreArgs = list(df_fit = df_fit,
df_baseline = df_base))
tli_change <- tli_fit - tli_approx
out[, "tli"] <- tli_change
}
if ("rmsea" %in% fit_measures) {
rmsea_fit <- lavaan::fitMeasures(fit, "rmsea")
# CHECK: The "n" of RMSEA in multiple group models.
tmp <- (chisq_fit_approx / (n - 1)) / df_fit - 1 / (n - 1)
rmsea_approx <- sqrt(pmax(tmp, 0, na.rm = TRUE))
rmsea_change <- rmsea_fit - rmsea_approx
out[, "rmsea"] <- rmsea_change
}
attr(out, "call") <- match.call()
attr(out, "method") <- "approx"
class(out) <- c("fit_measures_change", class(out))
out
}
#' @noRd
cfi <- function(chisq_fit, df_fit, chisq_baseline, df_baseline) {
# Adapted from lavaan:::lav_fit_measures
t1 <- max(c(chisq_fit - df_fit, 0))
t2 <- max(c(chisq_fit - df_fit, chisq_baseline - df_baseline, 0))
if (isTRUE(all.equal(t1, 0)) &&
isTRUE(all.equal(t2, 0))) {
return(1)
} else {
return(1 - t1 / t2)
}
}
#' @noRd
tli <- function(chisq_fit, df_fit, chisq_baseline, df_baseline) {
# Adapted from lavaan:::lav_fit_measures
t1 <- (chisq_fit - df_fit) * df_baseline
t2 <- (chisq_baseline - df_baseline) * df_fit
if (df_fit > 0 && abs(t2) > 0) {
return(1 - t1 / t2)
} else {
return(1)
}
}