-
Notifications
You must be signed in to change notification settings - Fork 0
/
scree_plot.R
138 lines (116 loc) · 4.46 KB
/
scree_plot.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
#' Scree test
#'
#' \code{scree_plot} generates a scree plot with superimpose parallel analysis
#'
#' @param data a data frame or correlation matrix.
#' @param n.iter number of iterations for parallel analysis.
#' @param method factoring method (\code{pc}, \code{pa}, or \code{ml}).
#' @param ... parameters passed to \code{psych::fa.parallel}.
#'
#' @details
#' The \code{scree_plot} function is a wrapper for the \code{psych::parallel} function.
#' A ggplot2 graph is produced with the scree plot and parallel analysis (scree plot for
#' simulated data) based on the method chosen. The methods are principal components ("pc"),
#' principal axis ("pa"), and maximum likelihood ("ml") factoring.
#'
#' @importFrom psych fa.parallel
#' @import ggplot2
#'
#' @export
#'
#' @return a ggplot2 graph
#' @examples
#' scree_plot(Harman74.cor$cov, method="pc", n.obs=145)
scree_plot <- function(data, n.iter = 50, method = c("pc", "pa", "ml"), ...) {
# number of variables
nvar <- ncol(data)
# suppress output
pdf(file=NULL)
sink(file=tempfile())
method <- match.arg(method)
if (method == "pc") {
x <- psych::fa.parallel(x = data, n.iter = n.iter,
fa = "pc", ...)
caption <- "Principal Components Analysis"
}
if (method == "pa") {
x <- psych::fa.parallel(x = data, n.iter = n.iter,
fa = "fa", fm ="pa", ...)
caption <- "Principal Axis Factoring"
}
if (method == "ml") {
x <- psych::fa.parallel(x = data, n.iter = n.iter,
fa = "fa", fm ="ml", ...)
caption <- "Maximum Likelihood Factoring"
}
# unsuppress output
sink()
dev.off()
if (method == "pc") {
xlab <- "Component Number"
yintercept <- 1
actual <- x$pc.values
sim <- x$pc.sim
nfact <- x$ncomp
cat("\nNote: parallel analysis suggests", nfact, "components.\n")
} else {
xlab <- "Factor Number"
yintercept <- 0
actual <- x$fa.values
sim <- x$fa.sim
nfact <- x$nfact
cat("\nNote: parallel analysis suggests", nfact, "factors.\n")
}
# arrange plot data
actual <- data.frame(n = 1:nvar, eigenvalue <- actual, type="actual")
sim <- data.frame(n = 1:nvar, eigenvalue <- sim, type="simulated")
names(actual) <- names(sim) <- c("n", "eigenvalue", "type")
plotdata <- as.data.frame(rbind(actual, sim))
# plot results
apatheme=theme_bw()+
theme(panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
panel.background = element_blank(),
panel.border = element_blank(),
#text=element_text(family='Arial'),
legend.title=element_blank(),
legend.position=c(.7,.8),
axis.line.x = element_line(color='black'),
axis.line.y = element_line(color='black'))
# Use data from plotdata. Map number of factors to x-axis, eigenvalue to y-axis,
# and give different data point shapes depending on whether eigenvalue is
# observed or simulated
p = ggplot(plotdata, aes(x=.data[["n"]],
y=.data[["eigenvalue"]],
shape=.data[["type"]],
color=.data[["type"]])) +
# Add lines connecting data points
geom_line() +
# Add the data points.
geom_point(size=4) +
# Label axes and titles
labs(y = "Eigenvalue",
x = xlab,
title = "Scree Plot",
subtitle = paste0("with parallel analysis (", n.iter, " iterations)"),
caption = caption) +
# Ensure x-axis ranges from 1 to max # of factors,
# increasing by one with each 'tick' mark.
scale_x_continuous(breaks=min(plotdata$n):max(plotdata$n)) +
# Ensure y-axis that it ranges from below the lowest eigenvalue
# to above the highest eigenvalue an increments by .25.
scale_y_continuous(breaks=round(seq(from = floor(min(plotdata$eigenvalue)),
to = ceiling(max(plotdata$eigenvalue)),
by=.5), 2)) +
# Manually specify the different shapes to use for actual and simulated data,
# in this case, white and black circles.
scale_shape_manual(values=c(16, 1)) +
# Add horizontal line indicating Kaiser Harris criteria
geom_hline(yintercept = yintercept, linetype = "dashed", color="lightgray") +
# Add vertical line indicating parallel analysis suggested max # of factors to retain
geom_vline(xintercept = nfact, linetype = 'dashed') +
# Apply our apa-formatting theme
apatheme
# return plot
p
}