-
Notifications
You must be signed in to change notification settings - Fork 0
/
summary_stats_tables.Rmd
441 lines (365 loc) · 16.2 KB
/
summary_stats_tables.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
---
title: "Convergence, Admissibility, and Fit Descriptive Statistics"
date: "2019-05-07"
output: workflowr::wflow_html
editor_options:
chunk_output_type: console
---
<!-- %
%% ============================================= #
%% Padgettt
%% ============================================= #
%% Data Created: 2019-02-20
%% Date Modified: 2019-02-26
%% By: R. Noah Padgett
%% ============================================= #
%% MA Thesis Project
%% ============================================= #
%% Purpose:
%% This R script is for
%% 1. creating simple descriptive statistics
%% across conditions
%% 2. Generating tables of those stats
%% 3. Create simple boxplots for trends
%% 4. Creating publication boxplots
%%
%% The output is numerous .eps files (figures)
%% and code for tables in latex
%% ============================================= # -->
Purpose of this file:
1. Summarize convergence
2. Summarize proper solution rates
3. Creating simple descriptive statistics across conditions
4. Generating tables of those stats
The output is mostly just a lot of latex ready tables. Not all of these tables are included in the final publication, but we wanted to be as precise as possible with respect to the summary of the fit statistics.
# Packages and Set-Up
```{r set-up, tidy=T}
##Chunk iptions
knitr::opts_chunk$set(out.width = "225%")
#setwd('C:/Users/noahp/Dropbox/MCFA Thesis/Code Results')
## Packages
## General Packages
library(tidyverse)
# Formatting and Tables
library(kableExtra)
library(xtable)
# For plotting
library(ggplot2)
theme_set(theme_bw())
# Data manipulating
library(dplyr)
```
# Data Management
```{r data, tidy=T}
sim_results <- as_tibble(read.table('data/compiled_fit_results.txt', header=T,sep='\t'))
## Next, turn condition into a factor for plotting
sim_results$Condition <- as.factor(sim_results$Condition)
## Next, since TLI is non-normed, any value greater than 1 needs to be rescaled to 1.
sim_results$TLI <- ifelse(sim_results$TLI > 1, 1, sim_results$TLI)
sim_results$TLI <- ifelse(sim_results$TLI < 0, 0, sim_results$TLI)
## Next, summarize the results of the chi-square test of model fit. This is done simply by comparing the p-value to alpha (0.05) and indicating whether the model was flagged as fitting or not.
# Note: if p < 0.05 then this variable is flagged as 0, and 1 otherwise
sim_results$Chi2_pvalue_decision <- ifelse(sim_results$chisqu_pvalue < 0.05, 0, 1)
# 0 = rejected that these data fit this model
# 1 = failed to reject that these data fit this model
```
## Adding Labels to Conditions
Currently, each condition is kind of like a hidden id that we don't know what the actual factor is.
So, first thing isto create meaningful labels for us to use.
Remember, the 72 conditions for the this study were
1. Level-1 sample size (ss\_l1 or n1) (5, 10, 30)
2. Level-2 sample size (ss\_l2 or n2) (30, 50, 100, 200)
3. Observed indicator ICC (.1, .3, .5)
4. Latent variable ICC (.1, .5)
```{r}
## level-1 Sample size
ss_l1 <- c(5, 10, 30) ## 6 conditions each
ss_l2 <- c(30, 50, 100, 200) ## 18 condition each
icc_ov <- c(.1, .3, .5) ## 2 conditions each
icc_lv <- c(.1, .5) ## every other condition
nCon <- 72 # number of conditions
nRep <- 500 # number of replications per condition
nMod <- 12 ## numberof estimated models per conditions
## Total number of rows: 432,000
ss_l2 <- c(rep(ss_l2[1], 18*nRep*nMod), rep(ss_l2[2], 18*nRep*nMod),
rep(ss_l2[3], 18*nRep*nMod), rep(ss_l2[4], 18*nRep*nMod))
ss_l1 <- rep(c(rep(ss_l1[1],6*nRep*nMod), rep(ss_l1[2],6*nRep*nMod), rep(ss_l1[3],6*nRep*nMod)), 4)
icc_ov <- rep(c(rep(icc_ov[1], 2*nRep*nMod), rep(icc_ov[2], 2*nRep*nMod), rep(icc_ov[3], 2*nRep*nMod)), 12)
icc_lv <- rep(c(rep(icc_lv[1], nRep*nMod), rep(icc_lv[2], nRep*nMod)), 36)
## Force these vectors to be column vectors
ss_l1 <- matrix(ss_l1, ncol=1)
ss_l2 <- matrix(ss_l2, ncol=1)
icc_ov <- matrix(icc_ov, ncol=1)
icc_lv <- matrix(icc_lv, ncol=1)
## Add the labels to the results data frame
sim_results <- sim_results[order(sim_results$Condition),]
sim_results <- cbind(sim_results, ss_l1, ss_l2, icc_ov, icc_lv)
## Force the conditions to be factors
sim_results$ss_l1 <- as.factor(sim_results$ss_l1)
sim_results$ss_l2 <- as.factor(sim_results$ss_l2)
sim_results$icc_ov <- as.factor(sim_results$icc_ov)
sim_results$icc_lv <- as.factor(sim_results$icc_lv)
sim_results$Model <- factor(sim_results$Model, levels = c('C','M1','M2','M12'), ordered = T)
## Set up iterators for remainder of script
mods <- c('C', 'M1', 'M2', 'M12')
ests <- c('MLR', 'ULSMV', 'WLSMV')
```
For the descriptive statistics, I will use dplyr.
From here I can easily create matrices that store the results so that I can easily print out the results for summarizing the results.
Each will be printed out as a html table and a xtable (latex ready) table.
# Convergence Summary
Convergence will be broken out by Model (C, M1, M2, M12) and estimator (MLR, WLSMV, ULSMV).
So, there will 12 smallish tables piecemail tables.
Next, one very large table of all the conditions will be exported in latex ready format.
## Convergence by Model and Estimator
```{r}
## first table summary table
c <- sim_results %>%
group_by(Model, Estimator) %>%
summarise(Converge=mean(Converge))
# Next make the columns the estimator factor
c <- cbind(c[ c$Model == 'C', 'Converge'],
c[ c$Model == 'M1', 'Converge'],
c[ c$Model == 'M2', 'Converge'],
c[ c$Model == 'M12', 'Converge'])
rownames(c) <- c('MLR', 'ULSMV', 'WLSMV')
colnames(c) <- c('C' ,'M1' ,'M2', 'M12')
## Print results in a nice looking table in HTML
kable(c, format='html') %>%
kable_styling(full_width = T)%>%
add_header_above(c(' '= 1, 'Model Specification'=4))
## Print out in tex
print(xtable(c, digits = 3), booktabs = T, include.rownames = T)
```
## Model, Estimator, and Sample Sizes
```{r}
## first table summary table
c <- sim_results %>%
group_by(Model, Estimator, ss_l1, ss_l2) %>%
summarise(Converge=mean(Converge))
# Next make the columns the estimator factor
c1 <- cbind(c[ c$Model == 'C', c('Estimator', 'ss_l1', 'ss_l2', 'Converge')],
c[ c$Model == 'M1', 'Converge'],
c[ c$Model == 'M2', 'Converge'],
c[ c$Model == 'M12', 'Converge'])
colnames(c1) <- c('Estimator', 'SS Level-1', 'SS Level-2', 'C' ,'M1' ,'M2', 'M12')
## Print results in a nice looking table in HTML
kable(c1, format='html') %>%
kable_styling(full_width = T) %>%
add_header_above(c(' '= 3, 'Model Specification'=4))
## Print out in tex
print(xtable(c1, digits = 3), booktabs = T, include.rownames = F)
```
## Across all conditions
```{r}
## first table summary table
c <- sim_results %>%
group_by(Model, Estimator, ss_l1, ss_l2, icc_ov, icc_lv) %>%
summarise(Converge=mean(Converge))
# Next make the columns the estimator factor
c1 <- cbind(c[ c$Model == 'C' & c$Estimator == 'MLR', c('ss_l1', 'ss_l2','icc_ov', 'icc_lv', 'Converge')],
c[ c$Model == 'C' & c$Estimator == 'ULSMV', 'Converge'],
c[ c$Model == 'C' & c$Estimator == 'WLSMV', 'Converge'],
c[ c$Model == 'M1' & c$Estimator == 'MLR', 'Converge'],
c[ c$Model == 'M1' & c$Estimator == 'ULSMV', 'Converge'],
c[ c$Model == 'M1' & c$Estimator == 'WLSMV', 'Converge'],
c[ c$Model == 'M2' & c$Estimator == 'MLR', 'Converge'],
c[ c$Model == 'M2' & c$Estimator == 'ULSMV', 'Converge'],
c[ c$Model == 'M2' & c$Estimator == 'WLSMV', 'Converge'],
c[ c$Model == 'M12' & c$Estimator == 'MLR', 'Converge'],
c[ c$Model == 'M12' & c$Estimator == 'ULSMV', 'Converge'],
c[ c$Model == 'M12' & c$Estimator == 'WLSMV', 'Converge'])
colnames(c1) <- c('SS Level-1', 'SS Level-2', 'ICC-OV', 'ICC-LV', rep(c('MLR', 'ULSMV', 'WLSMV'), 4))
## Print results in a nice looking table in HTML
kable(c1, format='html') %>%
kable_styling(full_width = T) %>%
add_header_above(c(' '= 4, 'Model C'=3, 'Model M1'=3, 'Model M2'=3, 'Model M12'=3))
## Print out in tex
print(xtable(c1, digits = 3), booktabs = T, include.rownames = F)
```
# Admissibility Summary
Admissibility rates are first subsetted to the converged models.
So, the rates may seem misleading and not directly relatable across all conditions and models due to differences in convergence rates.
```{r}
c.sim_results <- filter(sim_results, Converge == 1)
```
## Admissibilty by Model and Estimator
```{r}
## first table summary table
c <- c.sim_results %>%
group_by(Model, Estimator) %>%
summarise(Admissible=mean(Admissible))
# Next make the columns the estimator factor
c1 <- cbind(c[ c$Model == 'C', 'Admissible'],
c[ c$Model == 'M1', 'Admissible'],
c[ c$Model == 'M2', 'Admissible'],
c[ c$Model == 'M12', 'Admissible'])
rownames(c1) <- c('MLR', 'ULSMV', 'WLSMV')
colnames(c1) <- c('C' ,'M1' ,'M2', 'M12')
## Print results in a nice looking table in HTML
kable(c1, format='html') %>%
kable_styling(full_width = T) %>%
add_header_above(c(' '= 1, 'Model Specification'=4))
## Print out in tex
print(xtable(c1, digits = 3), booktabs = T, include.rownames = T)
```
## Model, Estimator, and Sample Sizes
```{r}
## first table summary table
c <- c.sim_results %>%
group_by(Model, Estimator, ss_l1, ss_l2) %>%
summarise(Admissible=mean(Admissible))
# Next make the columns the estimator factor
c1 <- cbind(c[ c$Model == 'C', c('Estimator', 'ss_l1', 'ss_l2', 'Admissible')],
c[ c$Model == 'M1', 'Admissible'],
c[ c$Model == 'M2', 'Admissible'],
c[ c$Model == 'M12', 'Admissible'])
colnames(c1) <- c('Estimator', 'SS Level-1', 'SS Level-2', 'C' ,'M1' ,'M2', 'M12')
## Print results in a nice looking table in HTML
kable(c1, format='html') %>%
kable_styling(full_width = T) %>%
add_header_above(c(' '= 3, 'Model Specification'=4))
## Print out in tex
print(xtable(c1, digits = 3), booktabs = T, include.rownames = F)
```
## Across all conditions
```{r}
## first table summary table
c <- c.sim_results %>%
group_by(Model, Estimator, ss_l1, ss_l2, icc_ov, icc_lv) %>%
summarise(Admissible=mean(Admissible))
# Next make the columns the estimator factor
c1 <- cbind(c[ c$Model == 'C' & c$Estimator == 'MLR', c('ss_l1', 'ss_l2','icc_ov', 'icc_lv', 'Admissible')],
c[ c$Model == 'C' & c$Estimator == 'ULSMV', 'Admissible'],
c[ c$Model == 'C' & c$Estimator == 'WLSMV', 'Admissible'],
c[ c$Model == 'M1' & c$Estimator == 'MLR', 'Admissible'],
c[ c$Model == 'M1' & c$Estimator == 'ULSMV', 'Admissible'],
c[ c$Model == 'M1' & c$Estimator == 'WLSMV', 'Admissible'],
c[ c$Model == 'M2' & c$Estimator == 'MLR', 'Admissible'],
c[ c$Model == 'M2' & c$Estimator == 'ULSMV', 'Admissible'],
c[ c$Model == 'M2' & c$Estimator == 'WLSMV', 'Admissible'],
c[ c$Model == 'M12' & c$Estimator == 'MLR', 'Admissible'],
c[ c$Model == 'M12' & c$Estimator == 'ULSMV', 'Admissible'],
c[ c$Model == 'M12' & c$Estimator == 'WLSMV', 'Admissible'])
colnames(c1) <- c('SS Level-1', 'SS Level-2', 'ICC-OV', 'ICC-LV', rep(c('MLR', 'ULSMV', 'WLSMV'), 4))
## Print results in a nice looking table in HTML
kable(c1, format='html') %>%
kable_styling(full_width = T) %>%
add_header_above(c(' '= 4, 'Model C'=3, 'Model M1'=3, 'Model M2'=3, 'Model M12'=3))
## Print out in tex
print(xtable(c1, digits = 3), booktabs = T, include.rownames = F)
```
# Descriptive Statistics of Fit Indices
Now for the long process of making tables for the MANY conditions for the descriptive statistics.
For this, we need to do this is steps so that all the information gets outputted in the correct manor for table.
For each statistic under each condition, model, and estimator, the code below create a table that contains the average value and the standard deviation.
Again, just like the descriptives above, a summary table was made to start.
These tables are made based only on the 1) converged models and 2) the admissible solutions.
The fit indices included are:
1. Chi-square p-value summary, i.e. proportion of tims the p-value was greater than 0.05. No SD is reported here.
2. CFI with mean and sd
3. TLI with mean and sd
4. RMSEA with mean and sd
5. SRMRW with mean and sd
6. SRMRB with mean and sd
```{r fit-sum}
mydata <- filter(sim_results, Converge == 1 & Admissible == 1)
## first table summary table
a <- mydata %>%
group_by(Model, Estimator) %>%
summarise(
chi2=mean(Chi2_pvalue_decision, na.rm = T),
CFI.m =mean(CFI, na.rm = T), CFI.sd =sd(CFI, na.rm = T),
TLI.m =mean(TLI, na.rm = T), TLI.sd =sd(TLI, na.rm = T),
RMSEA.m =mean(RMSEA, na.rm = T), RMSEA.sd =sd(RMSEA, na.rm = T),
SRMRW.m =mean(SRMRW, na.rm = T), SRMRW.sd =sd(SRMRW, na.rm = T),
SRMRB.m =mean(SRMRB, na.rm = T), SRMRB.sd =sd(SRMRB, na.rm = T)
)
## Print results in a nice looking table in HTML
kable(a, format='html') %>%
kable_styling(full_width = T)
## make a copy of a to print into
a1 <- as_tibble(as.data.frame(matrix(NA, ncol=8,nrow=nrow(a))))
colnames(a1) <- c('Model', 'Estimator', "chi2", "CFI",'TLI', 'RMSEA', 'SRMRW', 'SRMRB')
i <- 1
for(i in 1:nrow(a)){
a1[i,3:8] <- unlist(c(
round(a[i,3],3),
paste0(round(a[i,4],3), ' (', round(a[i,5],2), ')'),
paste0(round(a[i,6],3), ' (', round(a[i,7],2), ')'),
paste0(round(a[i,8],3), ' (', round(a[i,9],2), ')'),
paste0(round(a[i,10],3), ' (', round(a[i,11],2), ')'),
paste0(round(a[i,12],3), ' (', round(a[i,12],2), ')')
))
}
a1[,1:2] <- a[,1:2]## add factors back
## Print out in tex
print(xtable(a1, digits = 3), booktabs = T, include.rownames = F)
```
## TONS of additional tables of the fit statistics across conditions
```{r}
## Now, create MANY subset tables to breakdown these relationships
## loop around these iterators
for(M in mods){
for(E in ests){
### subset tothe model (M) and estimator (E)
#M <- 'C'
#E <- 'MLR'
cat('\n\n ===============================\n')
cat('\nModel:\t', M)
cat('\nEstimator:\t', E, '\n')
sub_dat <- mydata[ mydata$Model == M & mydata$Estimator == E,]
a <- sub_dat %>%
group_by(ss_l1, ss_l2, icc_ov, icc_lv) %>%
summarise(
N = n(),
chi2=mean(Chi2_pvalue_decision, na.rm = T),
CFI.m =mean(CFI, na.rm = T), CFI.sd =sd(CFI, na.rm = T),
TLI.m =mean(TLI, na.rm = T), TLI.sd =sd(TLI, na.rm = T),
RMSEA.m =mean(RMSEA, na.rm = T), RMSEA.sd =sd(RMSEA, na.rm = T),
SRMRW.m =mean(SRMRW, na.rm = T), SRMRW.sd =sd(SRMRW, na.rm = T),
SRMRB.m =mean(SRMRB, na.rm = T), SRMRB.sd =sd(SRMRB, na.rm = T)
)
#print(xtable(a, digits = 3), booktabs = T, include.rownames = F)
## Now, create subsets of this results matrix for outputting into small(ish) tables
## Subset by ICC conditions
ICCO <- unique(a$icc_ov)
ICCL <- unique(a$icc_lv)
icco <- ICCO[1]
iccl <- ICCL[1]
for(icco in ICCO){
for(iccl in ICCL){
### subset tothe model (M) and estimator (E)
#M <- 'C'
#E <- 'MLR'
cat('\n===============================\n')
cat('\nModel:\t', M)
cat('\nEstimator:\t', E)
cat('\nICC Obs. Var.:\t', icco)
cat('\nICC Lat. Var.:\t', iccl,'\n')
a_s <- filter(a, icc_ov == icco, icc_lv == iccl)
## make a copy of a to print into
a1 <- as_tibble(as.data.frame(matrix(NA, ncol=9,nrow=nrow(a_s))))
colnames(a1) <- c('N2', 'N1', 'Num_Rep', "chi2", "CFI",'TLI', 'RMSEA', 'SRMRW', 'SRMRB')
i <- 1
for(i in 1:nrow(a_s)){
a1[i,4:9] <- unlist(c(
round(a_s[i,6],2),
paste0(round(a_s[i,7],2), '(', round(a_s[i,8],2), ')'),
paste0(round(a_s[i,9],2), '(', round(a_s[i,10],2), ')'),
paste0(round(a_s[i,11],2), '(', round(a_s[i,12],2), ')'),
paste0(round(a_s[i,13],2), '(', round(a_s[i,14],2), ')'),
paste0(round(a_s[i,15],2), '(', round(a_s[i,16],2), ')')
))
}
a1[,1:2] <- a_s[,c(2,1)]## add factors back with diff. order
## Print out in tex
print(xtable(a1,
caption = paste0('Summary of Fit Statistics Across Conditions: Model ',
M,', Estimator ',E,', ICC_O ',icco,' and ICC_L ', iccl)),
booktabs = T, include.rownames = F)
}
}## End subset table printing
}
} ## End loops..
```