-
-
Notifications
You must be signed in to change notification settings - Fork 14
/
lfda.R
169 lines (155 loc) · 6.11 KB
/
lfda.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
#' Local Fisher Discriminant Analysis for
#' Supervised Dimensionality Reduction
#'
#' Performs local fisher discriminant analysis (LFDA) on the given data.
#'
#' LFDA is a method for linear dimensionality reduction that maximizes
#' between-class scatter and minimizes within-class scatter while at the
#' same time maintain the local structure of the data so that multimodal
#' data can be embedded appropriately. Its limitation is that it only
#' looks for linear boundaries between clusters. In this case, a non-linear
#' version called kernel LFDA will be used instead. Three metric types can
#' be used if needed.
#'
#' @import rARPACK
#'
#' @export lfda
#'
#' @param x n x d matrix of original samples.
#' n is the number of samples.
#' @param y length n vector of class labels
#' @param r dimensionality of reduced space (default: d)
#' @param metric type of metric in the embedding space (no default)
#' 'weighted' --- weighted eigenvectors
#' 'orthonormalized' --- orthonormalized
#' 'plain' --- raw eigenvectors
#' @param knn parameter used in local scaling method (default: 5)
#'
#' @return list of the LFDA results:
#' \item{T}{d x r transformation matrix (Z = x * T)}
#' \item{Z}{n x r matrix of dimensionality reduced samples}
#'
#' @keywords lfda local fisher discriminant transformation mahalanobis metric
#'
#' @author Yuan Tang
#'
#' @seealso See \code{\link{klfda}} for the kernelized variant of
#' LFDA (Kernel LFDA).
#'
#' @references
#' Sugiyama, M (2007).
#' Dimensionality reduction of multimodal labeled data by
#' local Fisher discriminant analysis.
#' \emph{Journal of Machine Learning Research}, vol.\bold{8}, 1027--1061.
#'
#' Sugiyama, M (2006).
#' Local Fisher discriminant analysis for supervised dimensionality reduction.
#' In W. W. Cohen and A. Moore (Eds.), \emph{Proceedings of 23rd International
#' Conference on Machine Learning (ICML2006)}, 905--912.
#'
#' @import rARPACK
#'
#' @examples
#'
#' k <- iris[, -5]
#' y <- iris[, 5]
#' r <- 3
#' lfda(k, y, r, metric = "plain")
lfda <- function(x, y, r, metric = c("orthonormalized", "plain", "weighted"), knn = 5) {
metric <- match.arg(metric) # the type of the transforming matrix (metric)
x <- t(as.matrix(x)) # transpose of original samples
y <- t(as.matrix(y)) # transpose of original class labels
d <- nrow(x) # number of predictors
n <- ncol(x) # number of samples
if (is.null(r)) r <- d # if no dimension reduction requested, set r to d
tSb <- mat.or.vec(d, d) # initialize between-class scatter matrix (to be maximized)
tSw <- mat.or.vec(d, d) # initialize within-class scatter matrix (to be minimized)
# compute the optimal scatter matrices in a classwise manner
for (i in unique(as.vector(t(y)))) {
Xc <- x[, y == i] # data for this class
nc <- ncol(Xc)
# determine local scaling for locality-preserving projection
Xc2 <- t(as.matrix(colSums(Xc^2)))
# calculate the distance, using a self-defined repmat function that's the same
# as repmat() in Matlab
distance2 <- repmat(Xc2, nc, 1) + repmat(t(Xc2), 1, nc) - 2 * t(Xc) %*% Xc
# Get affinity matrix
A <- getAffinityMatrix(distance2, knn, nc)
Xc1 <- as.matrix(rowSums(Xc))
G <- Xc %*% (repmat(as.matrix(colSums(A)), 1, d) * t(Xc)) - Xc %*% A %*% t(Xc)
tSb <- tSb + (G / n) + Xc %*% t(Xc) * (1 - nc / n) + Xc1 %*% (t(Xc1) / n)
tSw <- tSw + G / nc
}
X1 <- as.matrix(rowSums(x))
tSb <- tSb - X1 %*% t(X1) / n - tSw
tSb <- (tSb + t(tSb)) / 2 # final between-class cluster matrix
tSw <- (tSw + t(tSw)) / 2 # final within-class cluster matrix
# find generalized eigenvalues and normalized eigenvectors of the problem
if (r == d) {
# without dimensionality reduction
eigTmp <- eigen(solve(tSw) %*% tSb) # eigenvectors here are normalized
} else {
# dimensionality reduction (select only the r largest eigenvalues of the problem)
eigTmp <- suppressWarnings(rARPACK::eigs(A = solve(tSw) %*% tSb, k = r, which = "LM")) # r largest magnitude eigenvalues
}
eigVec <- Re(eigTmp$vectors) # the raw transforming matrix
eigVal <- as.matrix(Re(eigTmp$values))
# options to require a particular type of returned transform matrix
# transforming matrix (do not change the "=" in the switch statement)
Tr <- getMetricOfType(metric, eigVec, eigVal, d)
Z <- t(t(Tr) %*% x) # transformed data
out <- list("T" = Tr, "Z" = Z)
class(out) <- "lfda"
return(out)
}
#' LFDA Transformation/Prediction on New Data
#'
#' This function transforms a data set, usually a testing set, using the trained LFDA metric
#' @param object The result from lfda function, which contains a transformed data and a transforming
#' matrix that can be used for transforming testing set
#' @param newdata The data to be transformed
#' @param type The output type, in this case it defaults to "raw" since the output is a matrix
#' @param ... Additional arguments
#' @export
#' @method predict lfda
#' @return the transformed matrix
#' @author Yuan Tang
#'
#' @examples
#'
#' k <- iris[, -5]
#' y <- iris[, 5]
#' r <- 3
#' model <- lfda(k, y, r = 4, metric = "plain")
#' predict(model, iris[, -5])
predict.lfda <- function(object, newdata = NULL, type = "raw", ...) {
if (is.null(newdata)) {
stop("You must provide data to be used for transformation. ")
}
if (type != "raw") {
stop('Types other than "raw" are currently unavailable. ')
}
if (is.data.frame(newdata)) newdata <- as.matrix(newdata)
transformMatrix <- object$T
result <- newdata %*% transformMatrix
result
}
#' Print an lfda object
#'
#' Print an lfda object
#' @param x The result from lfda function, which contains a transformed data and a transforming
#' @param ... ignored
#' @export
#' @importFrom stats cov
#' @importFrom utils head
#' @method print lfda
print.lfda <- function(x, ...) {
cat("Results for Local Fisher Discriminant Analysis \n\n")
cat("The trained transforming matrix is: \n")
print(head(x$T))
cat("\n\n The original dataset after applying this metric transformation is: \n")
print(head(x$Z))
cat("\n")
cat("Only partial output is shown above. Please see the model output for more details. \n")
invisible(x)
}