-
Notifications
You must be signed in to change notification settings - Fork 10
/
linear_LDP.R
executable file
·144 lines (140 loc) · 5.02 KB
/
linear_LDP.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
#' Locally Discriminating Projection
#'
#' Locally Discriminating Projection (LDP) is a supervised linear dimension reduction method.
#' It utilizes both label/class information and local neighborhood information to discover
#' the intrinsic structure of the data. It can be considered as an extension
#' of LPP in a supervised manner.
#'
#' @examples
#' ## generate data of 3 types with clear difference
#' dt1 = aux.gensamples(n=20)-100
#' dt2 = aux.gensamples(n=20)
#' dt3 = aux.gensamples(n=20)+100
#'
#' ## merge the data and create a label correspondingly
#' X = rbind(dt1,dt2,dt3)
#' label = rep(1:3, each=20)
#'
#' ## try different neighborhood sizes
#' out1 = do.ldp(X, label, type=c("proportion",0.10))
#' out2 = do.ldp(X, label, type=c("proportion",0.25))
#' out3 = do.ldp(X, label, type=c("proportion",0.50))
#'
#' ## visualize
#' opar <- par(no.readonly=TRUE)
#' par(mfrow=c(1,3))
#' plot(out1$Y, col=label, pch=19, main="10% connectivity")
#' plot(out2$Y, col=label, pch=19, main="25% connectivity")
#' plot(out3$Y, col=label, pch=19, main="50% connectivity")
#' par(opar)
#'
#' @param X an \eqn{(n\times p)} matrix or data frame whose rows are observations
#' and columns represent independent variables.
#' @param label a length-\eqn{n} vector of data class labels.
#' @param ndim an integer-valued target dimension.
#' @param type a vector of neighborhood graph construction. Following types are supported;
#' \code{c("knn",k)}, \code{c("enn",radius)}, and \code{c("proportion",ratio)}.
#' Default is \code{c("proportion",0.1)}, connecting about 1/10 of nearest data points
#' among all data points. See also \code{\link{aux.graphnbd}} for more details.
#' @param preprocess an additional option for preprocessing the data.
#' Default is "center". See also \code{\link{aux.preprocess}} for more details.
#' @param beta bandwidth parameter for heat kernel in \eqn{(0,\infty)}.
#'
#' @return a named list containing
#' \describe{
#' \item{Y}{an \eqn{(n\times ndim)} matrix whose rows are embedded observations.}
#' \item{trfinfo}{a list containing information for out-of-sample prediction.}
#' \item{projection}{a \eqn{(p\times ndim)} whose columns are basis for projection.}
#' }
#'
#' @references
#' \insertRef{zhao_local_2006}{Rdimtools}
#'
#' @author Kisung You
#' @rdname linear_LDP
#' @concept linear_methods
#' @export
do.ldp <- function(X, label, ndim=2, type=c("proportion",0.1),
preprocess=c("center","scale","cscale","decorrelate","whiten"), beta=10.0){
#------------------------------------------------------------------------
## PREPROCESSING
# 1. data matrix
aux.typecheck(X)
n = nrow(X)
p = ncol(X)
# 2. label information
label = as.numeric(as.factor(label))
ulabel = unique(label)
K = length(ulabel)
if (K==1){
stop("* do.ldp : 'label' should have at least 2 unique labelings.")
}
if (K==n){
stop("* do.ldp : given 'label' has all unique elements.")
}
if (any(is.na(label))||(any(is.infinite(label)))){
stop("* Supervised Learning : any element of 'label' as NA or Inf will simply be considered as a class, not missing entries.")
}
# 3. ndim
ndim = as.integer(ndim)
if (!check_ndim(ndim,p)){
stop("* do.ldp : 'ndim' is a positive integer in [1,#(covariates)].")
}
# 4. type
nbdtype = type
nbdsymmetric = "union"
# 5. preprocess
if (missing(preprocess)){
algpreprocess = "center"
} else {
algpreprocess = match.arg(preprocess)
}
# 6. beta
beta = as.double(beta)
if (!check_NumMM(beta,0,Inf,compact=FALSE)){stop("* do.ldp : 'beta' is a bandwidth parameter in (0,Inf).")}
#------------------------------------------------------------------------
## COMPUTATION : PRELIMINARY
# 1. preprocessing of data : note that output pX still has (n-by-p) format
tmplist = aux.preprocess.hidden(X,type=algpreprocess,algtype="linear")
trfinfo = tmplist$info
pX = tmplist$pX
# 2. build neighborhood information
nbdstruct = aux.graphnbd(pX,method="euclidean",
type=nbdtype,symmetric=nbdsymmetric)
nbdmask = nbdstruct$mask
#------------------------------------------------------------------------
## COMPUTATION : MAIN FOR LOCALLY DISCRIMINATING PROJECTION
# 1. build W
Dsqmat = exp(-(as.matrix(dist(pX))^2)/beta)
W = array(0,c(n,n))
for (i in 1:(n-1)){
for (j in (i+1):n){
alpha = Dsqmat[i,j]
if (nbdmask[i,j]==TRUE){
if (label[i]==label[j]){
thevalue = alpha*(1+alpha)
W[i,j] = thevalue
W[j,i] = thevalue
} else {
thevalue = alpha*(1-alpha)
W[i,j] = thevalue
W[j,i] = thevalue
}
}
}
}
# 2. D and L
D = diag(rowSums(W))
L = D-W
# 3. cost function and geigen, BOTTOM solutions
LHS = t(pX)%*%L%*%pX
RHS = t(pX)%*%D%*%pX
projection = aux.geigen(LHS, RHS, ndim, maximal=FALSE)
#------------------------------------------------------------------------
## RETURN
result = list()
result$Y = pX%*%projection
result$trfinfo = trfinfo
result$projection = projection
return(result)
}