/
nonlinear_TSNE.R
executable file
·173 lines (166 loc) · 6.86 KB
/
nonlinear_TSNE.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
#' t-distributed Stochastic Neighbor Embedding
#'
#' \eqn{t}-distributed Stochastic Neighbor Embedding (t-SNE) is a variant of Stochastic Neighbor Embedding (SNE)
#' that mimicks patterns of probability distributinos over pairs of high-dimensional objects on low-dimesional
#' target embedding space by minimizing Kullback-Leibler divergence. While conventional SNE uses gaussian
#' distributions to measure similarity, t-SNE, as its name suggests, exploits a heavy-tailed Student t-distribution.
#'
#' @param X an \eqn{(n\times p)} matrix or data frame whose rows are observations and columns represent independent variables.
#' @param ndim an integer-valued target dimension.
#' @param perplexity desired level of perplexity; ranging [5,50].
#' @param eta learning parameter.
#' @param maxiter maximum number of iterations.
#' @param jitter level of white noise added at the beginning.
#' @param jitterdecay decay parameter in (0,1). The closer to 0, the faster artificial noise decays.
#' @param momentum level of acceleration in learning.
#' @param pca whether to use PCA as preliminary step; \code{TRUE} for using it, \code{FALSE} otherwise.
#' @param pcascale a logical; \code{FALSE} for using Covariance, \code{TRUE} for using Correlation matrix. See also \code{\link{do.pca}} for more details.
#' @param symmetric a logical; \code{FALSE} to solve it naively, and \code{TRUE} to adopt symmetrization scheme.
#' @param BHuse a logical; \code{TRUE} to use Barnes-Hut approximation. See \code{\link[Rtsne]{Rtsne}} for more details.
#' @param BHtheta speed-accuracy tradeoff. If set as 0.0, it reduces to exact t-SNE.
#'
#' @return a named \code{Rdimtools} S3 object containing
#' \describe{
#' \item{Y}{an \eqn{(n\times ndim)} matrix whose rows are embedded observations.}
#' \item{algorithm}{name of the algorithm.}
#' }
#'
#' @examples
#' \donttest{
#' ## load iris data
#' data(iris)
#' set.seed(100)
#' subid = sample(1:150,50)
#' X = as.matrix(iris[subid,1:4])
#' lab = as.factor(iris[subid,5])
#'
#' ## compare different perplexity
#' out1 <- do.tsne(X, ndim=2, perplexity=5)
#' out2 <- do.tsne(X, ndim=2, perplexity=10)
#' out3 <- do.tsne(X, ndim=2, perplexity=15)
#'
#' ## Visualize three different projections
#' opar <- par(no.readonly=TRUE)
#' par(mfrow=c(1,3))
#' plot(out1$Y, pch=19, col=lab, main="tSNE::perplexity=5")
#' plot(out2$Y, pch=19, col=lab, main="tSNE::perplexity=10")
#' plot(out3$Y, pch=19, col=lab, main="tSNE::perplexity=15")
#' par(opar)
#' }
#'
#' @seealso \code{\link{do.sne}}
#' @references
#' \insertRef{vandermaaten_visualizing_2008}{Rdimtools}
#'
#' @author Kisung You
#' @rdname nonlinear_TSNE
#' @concept nonlinear_methods
#' @export
do.tsne <- function(X,ndim=2,perplexity=30,eta=0.05,maxiter=2000,
jitter=0.3,jitterdecay=0.99,momentum=0.5,
pca=TRUE,pcascale=FALSE,symmetric=FALSE,
BHuse=TRUE, BHtheta=0.25){
# 1. typecheck is always first step to perform.
pcaratio=0.90
aux.typecheck(X)
# 1-1. (integer) ndim
if (!is.numeric(ndim)||(ndim<1)||(ndim>ncol(X))){
stop("* do.tsne : 'ndim' is an integer in [1,#(covariates)].")
}
ndim = as.integer(ndim)
# 1-2. perplexity
if (!is.numeric(perplexity)||is.na(perplexity)||is.infinite(perplexity)||(perplexity<=0)){
stop("* do.tsne : perplexity should be a positive real number.")
}
if ((perplexity < 5)||(perplexity > 50)){
message("* do.tsne : a desired perplexity value is in [5,50].")
}
# obsolete params.
BarnesHut=as.logical(BHuse)
BHtheta=as.double(BHtheta)
# 2. Input Parameters
# 2-1. (double) eta = 0.5; learning parameter
if (!is.numeric(eta)||is.na(eta)||is.infinite(eta)||(eta<=0)){
stop("* do.tsne : learning rate 'eta' should be a positive real number.")
}
# 2-2. (integer) maxiter = 2000; maximum number of iterations
if (!is.numeric(maxiter)||(maxiter<2)||(is.na(maxiter))||(is.infinite(maxiter))){
stop("* do.tsne : maxiter should be suited for the number of iterations.")
}
# 2-3. (double) jitter = 0.3; random errors
if (!is.numeric(jitter)||(is.na(jitter))||(is.infinite(jitter))||(jitter<0)){
stop("* do.tsne : 'jitter' should be a positive real number.")
}
# 2-4. (double) jitterdecay = 0.99; decaying factor of jitter
decay = jitterdecay
if (!is.numeric(decay)||(is.na(decay))||(is.infinite(decay))||(decay<=0)||(decay>=1)){
stop("* do.tsne : 'jitterdecay' is a real number between (0,1).")
}
# 2-5. (double) momentum = 0.5
if ((!is.numeric(momentum))||(is.na(momentum))||(is.infinite(momentum))||(momentum<=0)){
stop("* do.tsne : 'momentum' should be a positive real number.")
}
# 2-6. (char) preprocess = 'center'
# algpreprocess = match.arg(preprocess)
# tmplist = aux.preprocess.hidden(X,type=algpreprocess,algtype="nonlinear")
# trfinfo = tmplist$info
# pX = tmplist$pX
# 2-7. (bool) pca = TRUE/FALSE
# If pca = TRUE
# pcaratio (0,1) : variance ratio
# pcascale : TRUE/FALSE
pcaflag = pca; if(!is.logical(pcaflag)){stop("* do.tsne : 'pca' is a logical variable.")}
if (!is.numeric(pcaratio)||(pcaratio<=0)||(pcaratio>=1)||is.na(pcaratio)){
stop("* do.tsne : pcaratio should be in (0,1).")
}
scaleflag = pcascale; if (!is.logical(scaleflag)){
stop("* do.tsne : pcascale is either TRUE or FALSE.")
}
if (pcaflag){
pcadim = ceiling((ncol(X) + ndim)/2)
pcaout = do.pca(X,ndim=pcadim,cor=scaleflag)
if (ncol(pcaout$Y)<=ndim){
message("* do.tsne : PCA scaling has gone too far.")
message("* do.tsne : Pass non-scaled data to t-SNE algortihm.")
tpX = t(X)
} else {
tpX = t(pcaout$Y)
}
} else {
tpX = t(X)
}
# 2-8. (bool) BarnesHut : TRUE/FALSE
# BHtheta : 0 means exact tSNE that runs with my own code.
BHflag = BarnesHut;
if (!is.logical(BHflag)){
stop("* do.tsne : 'BarnesHut' is a logical variable.")
}
if (!is.numeric(BHtheta)||is.na(BHtheta)||(BHtheta<0)||is.infinite(BHtheta)){
stop("* do.tsne : BHtheta is invalid. It should be >= 0.")
}
BHtheta = as.double(BHtheta)
# 3. Run Main Algorithm
if (!BHflag){
Perp = aux_perplexity(tpX,perplexity);
P = as.matrix(Perp$P)
vars = as.vector(Perp$vars)
Y = t(as.matrix(method_tsne(P,ndim,eta,maxiter,jitter,decay,momentum)))
} else {
pX = t(tpX)
dX = stats::dist(X)
dfun = utils::getFromNamespace("hidden_tsne","maotai")
out = dfun(dX, ndim=round(ndim),theta=BHtheta,perplexity=perplexity,pca=FALSE,max_iter=maxiter,
momentum=momentum,eta=eta)
# out = Rtsne(pX,dims=ndim,theta=BHtheta,perplexity=perplexity,pca=TRUE,max_iter=maxiter,
# momentum=momentum,eta=eta)
Y = out$embed
}
# 5. result
if (any(is.infinite(Y))||any(is.na(Y))){
stop("* do.tsne : t-SNE not successful; having either Inf or NA values.")
}
result = list()
result$Y = Y
result$algorithm = "nonlinear:TSNE"
return(structure(result, class="Rdimtools"))
}