-
Notifications
You must be signed in to change notification settings - Fork 103
/
fviz_ca.R
240 lines (227 loc) · 10.7 KB
/
fviz_ca.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
#' @include utilities.R
NULL
#'Visualize Correspondence Analysis
#'
#'@description Correspondence analysis (CA) is an extension of Principal
#' Component Analysis (PCA) suited to analyze frequencies formed by two
#' categorical variables. fviz_ca() provides ggplot2-based elegant
#' visualization of CA outputs from the R functions: CA [in FactoMineR], ca [in
#' ca], coa [in ade4], correspondence [in MASS] and expOutput/epCA [in
#' ExPosition]. Read more:
#' \href{http://www.sthda.com/english/wiki/correspondence-analysis-in-r-the-ultimate-guide-for-the-analysis-the-visualization-and-the-interpretation-r-software-and-data-mining}{Correspondence
#' Analysis}
#'
#' \itemize{ \item{fviz_ca_row(): Graph of row variables} \item{fviz_ca_col():
#' Graph of column variables} \item{fviz_ca_biplot(): Biplot of row and column
#' variables} \item{fviz_ca(): An alias of fviz_ca_biplot()} }
#'
#'@param X an object of class CA [FactoMineR], ca [ca], coa [ade4];
#' correspondence [MASS] and expOutput/epCA [ExPosition].
#'@param axes a numeric vector of length 2 specifying the dimensions to be
#' plotted.
#'@param shape.row,shape.col the point shapes to be used for row/column
#' variables. Default values are 19 for rows and 17 for columns.
#'@param geom a character specifying the geometry to be used for the graph.
#' Allowed values are the combination of c("point", "arrow", "text"). Use
#' "point" (to show only points); "text" to show only labels; c("point",
#' "text") or c("arrow", "text") to show both types.
#' @param geom.row,geom.col as \code{geom} but for row and column elements,
#' respectively. Default is geom.row = c("point", "text), geom.col =
#' c("point", "text").
#'@param label a character vector specifying the elements to be labelled.
#' Default value is "all". Allowed values are "none" or the combination of
#' c("row", "row.sup", "col", "col.sup"). Use "col" to label only active column
#' variables; "col.sup" to label only supplementary columns; etc
#'@param invisible a character value specifying the elements to be hidden on the
#' plot. Default value is "none". Allowed values are the combination of
#' c("row", "row.sup","col", "col.sup").
#'@param title the title of the graph
#'@param col.col,col.row color for column/row points. The default values are
#' "red" and "blue", respectively. Can be a continuous variable or a factor
#' variable. Allowed values include also : "cos2", "contrib", "coord", "x" or
#' "y". In this case, the colors for row/column variables are automatically
#' controlled by their qualities ("cos2"), contributions ("contrib"),
#' coordinates (x^2 + y^2, "coord"), x values("x") or y values("y")
#'@param alpha.col,alpha.row controls the transparency of colors. The value can
#' variate from 0 (total transparency) to 1 (no transparency). Default value is
#' 1. Allowed values include also : "cos2", "contrib", "coord", "x" or "y" as
#' for the arguments col.col and col.row.
#'@param col.col.sup,col.row.sup colors for the supplementary column and row
#' points, respectively.
#'@param repel a boolean, whether to use ggrepel to avoid overplotting text
#' labels or not.
#'@param select.col,select.row a selection of columns/rows to be drawn. Allowed
#' values are NULL or a list containing the arguments name, cos2 or contrib:
#' \itemize{ \item name is a character vector containing column/row names to be
#' drawn \item cos2 if cos2 is in [0, 1], ex: 0.6, then columns/rows with a
#' cos2 > 0.6 are drawn. if cos2 > 1, ex: 5, then the top 5 columns/rows with
#' the highest cos2 are drawn. \item contrib if contrib > 1, ex: 5, then the
#' top 5 columns/rows with the highest contrib are drawn }
#'@param map character string specifying the map type. Allowed options include:
#' "symmetric", "rowprincipal", "colprincipal", "symbiplot", "rowgab",
#' "colgab", "rowgreen" and "colgreen". See details
#'@param arrows Vector of two logicals specifying if the plot should contain
#' points (FALSE, default) or arrows (TRUE). First value sets the rows and the
#' second value sets the columns.
#'@param ... Additional arguments. \itemize{ \item in fviz_ca_row() and
#' fviz_ca_col(): Additional arguments are passed to the functions fviz() and
#' ggpubr::ggpar(). \item in fviz_ca_biplot() and fviz_ca(): Additional
#' arguments are passed to fviz_ca_row() and fviz_ca_col().}
#'@details The default plot of (M)CA is a "symmetric" plot in which both rows
#' and columns are in principal coordinates. In this situation, it's not
#' possible to interpret the distance between row points and column points. To
#' overcome this problem, the simplest way is to make an asymmetric plot. This
#' means that, the column profiles must be presented in row space or
#' vice-versa. The allowed options for the argument map are: \itemize{ \item
#' "rowprincipal" or "colprincipal": asymmetric plots with either rows in
#' principal coordinates and columns in standard coordinates, or vice versa.
#' These plots preserve row metric or column metric respectively. \item
#' "symbiplot": Both rows and columns are scaled to have variances equal to the
#' singular values (square roots of eigenvalues), which gives a symmetric
#' biplot but does not preserve row or column metrics. \item "rowgab" or
#' "colgab": Asymmetric maps, proposed by Gabriel & Odoroff (1990), with rows
#' (respectively, columns) in principal coordinates and columns (respectively,
#' rows) in standard coordinates multiplied by the mass of the corresponding
#' point. \item "rowgreen" or "colgreen": The so-called contribution biplots
#' showing visually the most contributing points (Greenacre 2006b). These are
#' similar to "rowgab" and "colgab" except that the points in standard
#' coordinates are multiplied by the square root of the corresponding masses,
#' giving reconstructions of the standardized residuals. }
#'@return a ggplot
#'@author Alboukadel Kassambara \email{alboukadel.kassambara@@gmail.com}
#'@seealso \code{\link{get_ca}}, \code{\link{fviz_pca}}, \code{\link{fviz_mca}}
#'@references http://www.sthda.com
#' @examples
#' # Correspondence Analysis
#' # ++++++++++++++++++++++++++++++
#' # Install and load FactoMineR to compute CA
#' # install.packages("FactoMineR")
#'
#' library("FactoMineR")
#' data(housetasks)
#' head(housetasks)
#' res.ca <- CA(housetasks, graph=FALSE)
#'
#' # Biplot of rows and columns
#' # ++++++++++++++++++++++++++
#' # Symetric Biplot of rows and columns
#' fviz_ca_biplot(res.ca)
#'
#' # Asymetric biplot, use arrows for columns
#' fviz_ca_biplot(res.ca, map ="rowprincipal",
#' arrow = c(FALSE, TRUE))
#'
#' # Keep only the labels for row points
#' fviz_ca_biplot(res.ca, label ="row")
#'
#' # Keep only labels for column points
#' fviz_ca_biplot(res.ca, label ="col")
#'
#'
#' # Select the top 7 contributing rows
#' # And the top 3 columns
#' fviz_ca_biplot(res.ca,
#' select.row = list(contrib = 7),
#' select.col = list(contrib = 3))
#'
#' # Graph of row variables
#' # +++++++++++++++++++++
#'
#' # Control automatically the color of row points
#' # using the "cos2" or the contributions "contrib"
#' # cos2 = the quality of the rows on the factor map
#' # Change gradient color
#' # Use repel = TRUE to avoid overplotting (slow if many points)
#' fviz_ca_row(res.ca, col.row = "cos2",
#' gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"),
#' repel = TRUE)
#'
#' # You can also control the transparency
#' # of the color by the "cos2" or "contrib"
#' fviz_ca_row(res.ca, alpha.row="contrib")
#'
#' # Select and visualize some rows with select.row argument.
#' # - Rows with cos2 >= 0.5: select.row = list(cos2 = 0.5)
#' # - Top 7 rows according to the cos2: select.row = list(cos2 = 7)
#' # - Top 7 contributing rows: select.row = list(contrib = 7)
#' # - Select rows by names: select.row = list(name = c("Breakfeast", "Repairs", "Holidays"))
#'
#' # Example: Select the top 7 contributing rows
#' fviz_ca_row(res.ca, select.row = list(contrib = 7))
#'
#'
#' # Graph of column points
#' # ++++++++++++++++++++++++++++
#'
#'
#' # Control colors using their contributions
#' fviz_ca_col(res.ca, col.col = "contrib",
#' gradient.cols = c("#00AFBB", "#E7B800", "#FC4E07"))
#'
#' # Select columns with select.col argument
#' # You can select by contrib, cos2 and name
#' # as previously described for ind
#' # Select the top 3 contributing columns
#' fviz_ca_col(res.ca, select.col = list(contrib = 3))
#'
#'
#'
#'@name fviz_ca
#'
#'@rdname fviz_ca
#'@export
fviz_ca_row <-function(X, axes = c(1,2), geom = c("point", "text"), geom.row = geom,
shape.row = 19, col.row ="blue", alpha.row = 1,
col.row.sup="darkblue",
select.row = list(name = NULL, cos2 = NULL, contrib = NULL),
map ="symmetric", repel = FALSE,
...)
{
p <- fviz (X, element = "row", axes = axes, geom = geom.row,
color = col.row, alpha = alpha.row,
pointshape = shape.row, select = select.row,
map = map, repel = repel,
col.row.sup = col.row.sup, shape.sup = shape.row, ...)
p
}
#' @rdname fviz_ca
#' @export
fviz_ca_col <-function(X, axes = c(1,2), shape.col = 17,
geom=c("point", "text"), geom.col = geom,
col.col ="red", col.col.sup="darkred", alpha.col = 1,
select.col = list(name = NULL, cos2 = NULL, contrib = NULL),
map ="symmetric", repel = FALSE,
...)
{
fviz (X, element = "col", axes = axes, geom = geom.col,
color = col.col, alpha = alpha.col,
pointshape = shape.col, select = select.col,
map = map, repel = repel,
colcol.sup = col.col.sup, shape.sup = shape.col, ...)
}
#' @rdname fviz_ca
#' @export
fviz_ca_biplot <-function(X, axes = c(1,2), geom=c("point", "text"),
geom.row = geom, geom.col = geom,
label = "all", invisible="none", arrows = c(FALSE, FALSE),
repel = FALSE, title = "CA - Biplot",
...)
{
# Rows
geom2 <- geom.row
if(arrows[1]==TRUE) geom2 <- setdiff(unique(c(geom2, "arrow")), "point")
p <- fviz_ca_row(X, axes = axes, geom=geom2, repel = repel,
label = label, invisible = invisible, ...)
# Add columns
geom2 <- geom.col
if(arrows[2]==TRUE) geom2 <- setdiff(unique(c(geom2, "arrow")), "point")
p <- fviz_ca_col(X, axes = axes, geom=geom2, repel = repel,
label = label, invisible = invisible,
ggp = p, ...)
p + labs(title=title)
}
#' @rdname fviz_ca
#' @export
fviz_ca <- function(X, ...){
fviz_ca_biplot(X, ...)
}