-
Notifications
You must be signed in to change notification settings - Fork 10
/
linear_NPE.R
executable file
·161 lines (147 loc) · 5.97 KB
/
linear_NPE.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
#' Neighborhood Preserving Embedding
#'
#' \code{do.npe} performs a linear dimensionality reduction using Neighborhood Preserving
#' Embedding (NPE) proposed by He et al (2005). It can be regarded as a linear approximation
#' to Locally Linear Embedding (LLE). Like LLE, it is possible for the weight matrix being rank deficient.
#' If \code{regtype} is set to \code{TRUE} with a proper value of \code{regparam}, it will
#' perform Tikhonov regularization as designated. When regularization is needed
#' with \code{regtype} parameter to be \code{FALSE}, it will automatically find a suitable
#' regularization parameter and put penalty for stable computation. See also
#' \code{\link{do.lle}} for more details.
#'
#' @param X an \eqn{(n\times p)} matrix or data frame whose rows are observations
#' and columns represent independent variables.
#' @param ndim an integer-valued target dimension.
#' @param type a vector of neighborhood graph construction. Following types are supported;
#' \code{c("knn",k)}, \code{c("enn",radius)}, and \code{c("proportion",ratio)}.
#' Default is \code{c("proportion",0.1)}, connecting about 1/10 of nearest data points
#' among all data points. See also \code{\link{aux.graphnbd}} for more details.
#' @param symmetric one of \code{"intersect"}, \code{"union"} or \code{"asymmetric"} is supported. Default is \code{"union"}. See also \code{\link{aux.graphnbd}} for more details.
#' @param weight \code{TRUE} to perform NPE on weighted graph, or \code{FALSE} otherwise.
#' @param preprocess an additional option for preprocessing the data.
#' Default is "null". See also \code{\link{aux.preprocess}} for more details.
#' @param regtype \code{FALSE} for not applying automatic Tikhonov Regularization,
#' or \code{TRUE} otherwise.
#' @param regparam a positive real number for Regularization. Default value is 1.
#'
#' @return a named list containing
#' \describe{
#' \item{Y}{an \eqn{(n\times ndim)} matrix whose rows are embedded observations.}
#' \item{eigval}{a vector of eigenvalues corresponding to basis expansion in an ascending order.}
#' \item{projection}{a \eqn{(p\times ndim)} whose columns are basis for projection.}
#' \item{trfinfo}{a list containing information for out-of-sample prediction.}
#' }
#'
#' @examples
#' \dontrun{
#' ## use iris data
#' data(iris)
#' set.seed(100)
#' subid = sample(1:150, 50)
#' X = as.matrix(iris[subid,1:4])
#' label = as.factor(iris[subid,5])
#'
#' ## use different settings for connectivity
#' output1 = do.npe(X, ndim=2, type=c("proportion",0.10))
#' output2 = do.npe(X, ndim=2, type=c("proportion",0.25))
#' output3 = do.npe(X, ndim=2, type=c("proportion",0.50))
#'
#' ## visualize three different projections
#' opar <- par(no.readonly=TRUE)
#' par(mfrow=c(1,3))
#' plot(output1$Y, pch=19, col=label, main="NPE::10% connected")
#' plot(output2$Y, pch=19, col=label, main="NPE::25% connected")
#' plot(output3$Y, pch=19, col=label, main="NPE::50% connected")
#' par(opar)
#'}
#'
#' @references
#' \insertRef{he_neighborhood_2005}{Rdimtools}
#'
#' @author Kisung You
#' @rdname linear_NPE
#' @concept linear_methods
#' @export
do.npe <- function(X, ndim=2, type=c("proportion",0.1), symmetric="union" ,weight=TRUE,
preprocess=c("null","center","scale","cscale","whiten","decorrelate"),
regtype=FALSE, regparam=1){
# 1. typecheck is always first step to perform.
aux.typecheck(X)
if ((!is.numeric(ndim))||(ndim<1)||(ndim>ncol(X))||is.infinite(ndim)||is.na(ndim)){
stop("* do.npe : 'ndim' is a positive integer in [1,#(covariates)].")
}
ndim = as.integer(ndim)
# 2. ... parameters
# 2-1. aux.graphnbd
# type : vector of c("knn",k), c("enn",radius), or c("proportion",ratio)
# symmetric : 'intersect','union', or 'asymmetric'
# 2-2. NPE itself
# weight : TRUE
# preprocess : 'null', 'center','decorrelate', or 'whiten'
# regtype : FALSE (default; need a value) / TRUE
# regparam : 1 (default)
nbdtype = type
nbdsymmetric = symmetric
algweight = weight
if (missing(preprocess)){
algpreprocess = "null"
} else {
algpreprocess = match.arg(preprocess)
}
if (!is.logical(regtype)){stop("* do.npe : 'regtype' should be a logical variable.")}
if (!is.numeric(regparam)||is.na(regparam)||is.infinite(regparam)||(regparam<=0)){
stop("* do.npe : 'regparam' should be a positive real-valued number; it is a Tikhonov Regularization Factor.")}
# regtype : FALSE (default; need a value) / TRUE
# regparam : 1 (default)
# 3. process : data preprocessing
tmplist = aux.preprocess.hidden(X,type=algpreprocess,algtype="linear")
trfinfo = tmplist$info
pX = tmplist$pX
n = nrow(pX)
p = ncol(pX)
# 4. process : neighborhood selection
nbdstruct = aux.graphnbd(pX,method="euclidean",
type=nbdtype,symmetric=nbdsymmetric)
# 5. main 1 : compute Weights
# k = max(apply(nbdstruct$mask,2,function(x) sum(as.double((x==TRUE)))))
W = array(0,c(n,n))
if (regtype==TRUE){
regvals = array(0,c(1,n))
}
for (i in 1:n){
# 5-1. separate target mask vector
tgtmask = nbdstruct$mask[i,]
tgtidx = which(tgtmask==TRUE)
# 5-2. select data
# For convenience, target matrix is transposed for Armadillo
vec_tgt = pX[i,]
mat_tgt = t(pX[tgtidx,])
k = ncol(mat_tgt)
# 5-3. compute with regularization
# 5-3-1. No Automatic Regularization
if (regtype==FALSE){
w = method_lleW(mat_tgt,vec_tgt,regparam);
} else {
# 5-3-2. Automatic Regularization
outW = method_lleWauto(mat_tgt,vec_tgt);
w = outW$w
regvals[i] = outW$regparam
}
W[i,tgtidx] = w;
}
# 6. main computation
tpX = t(pX)
output = method_npe(tpX,W);
eigvals = output$eigval
eigvecs = output$eigvec
# 7. return output
# 1. adjust projection
projector = aux.adjprojection(eigvecs[,1:ndim])
result = list()
result$Y = pX %*% projector
result$eigval = eigvals
result$projection = projector
trfinfo$algtype = "linear"
result$trfinfo = trfinfo
return(result)
}