-
Notifications
You must be signed in to change notification settings - Fork 54
/
neatsort.R
executable file
·214 lines (182 loc) · 7.07 KB
/
neatsort.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
#' @title Neatmap Sorting
#' @description Sort samples or features based on the neatmap approach.
#' @param x \code{\link{phyloseq-class}} object or a matrix
#' @param target For \code{\link{phyloseq-class}} input, the target is either
#' 'sites' (samples) or 'species' (features) (taxa/OTUs); for matrices,
#' the target is 'rows' or 'cols'.
#' @param method Ordination method. See \code{\link{ordinate}}
#' from \pkg{phyloseq} package.
#' For matrices, only the NMDS method is available.
#' @param distance Distance method. See \code{\link{ordinate}}
#' from \pkg{phyloseq} package.
#' @param first Optionally provide the name of the first sample/taxon to
#' start the ordering (the ordering is cyclic so we can start at any
#' point). The choice of the first sample may somewhat affect the
#' overall ordering.
#' @param ... Arguments to be passed.
#' @return Vector of ordered elements
#' @export
#' @examples
#' data(peerj32)
#' pseq <- peerj32$phyloseq
#' # For Phyloseq
#' sort.otu <- neatsort(pseq, target='species')
#' # For matrix
#' # sort.rows <- neatsort(abundances(pseq), target='rows')
#'
#' @references This function is partially based on code derived from the
#' \pkg{phyloseq} package. For the original
#' neatmap approach for heatmap sorting, see (and cite):
#' Rajaram, S., & Oono, Y. (2010). NeatMap--non-clustering heat map
#' alternatives in R. BMC Bioinformatics, 11, 45.
#'
#' @details This function borrows elements from the heatmap implementation in
#' the \pkg{phyloseq} package. The row/column sorting is there not available
#' as a separate function at present, however, hindering reuse in other tools.
#' Implemented in the microbiome package to provide an independent method for
#' easy sample/taxon reordering for phyloseq objects.
#' @keywords utilities
neatsort <- function(x, target, method="NMDS", distance="bray",
first=NULL, ...) {
# TODO harmonize completely for matrices vs phyloseqs
xo <- x
# Remame matrix targets as in phyloseq
target <- gsub("columns", "sites", target)
target <- gsub("cols", "sites", target)
target <- gsub("rows", "species", target)
target <- gsub("samples", "sites", target)
target <- gsub("features", "species", target)
if (length(is(x)) == 1 && is(x) %in% c("phyloseq", "otu_table")) {
samples <- sample_names(x)
features <- taxa(x)
# Capture the output to keep the screen clean
junk <- capture.output(
ord <- ordinate(x, method = method, distance = distance, ...),
file=NULL)
} else {
samples <- colnames(x)
features <- rownames(x)
if (!method == "NMDS") {
warning("Only NMDS dissimilarity implemented for matrices.
Using NMDS for sorting.")
method <- "NMDS"
}
if (target == "sites") {
x <- t(x)
}
# Neatmap sorting for matrices with NMDS Order Capture the output
# to keep the screen clean
if (distance %in% c("euclidean")) {
d <- dist(x, distance)
} else {
d <- vegdist(x, distance)
}
junk <- capture.output(ord <- metaMDS(d, wascores=FALSE,
autotransform=FALSE, noshare=FALSE), file=NULL)
}
# ------------------------------------------------------------------
# Order items with the NeatMap procedure Reorder by the angle in radial
# coordinates on the 2-axis plane.
DF <- NULL
# Define new sample ordering based on the ordination Quick fix: the scores
# function fails otherwise.
disp.target <- target
if (target == "species" && !is.phyloseq(xo)) {
disp.target <- "sites"
}
tmp <- try({
DF <- scores(ord, choices=c(1, 2), display=disp.target)
}, silent=TRUE)
if (inherits(tmp, "try-error")) {
warning(paste("Order failed with ", target, ".
Using default ordering.",
sep=""))
}
if (!is.null(DF)) {
# If the score accession worked, replace order
if (target == "sites") {
ordering <- samples[order(radial_theta(DF))]
} else if (target == "species") {
ordering <- features[order(radial_theta(DF))]
} else {
stop("Target should be either sites, cols, species or rows")
}
} else if (length(DF) > 1) {
if (target == "sites") {
ordering <- samples[order(DF)] # 1:nsamples(x)
} else if (target == "species") {
ordering <- features[order(DF)] # 1:ntaxa(x)
} else {
stop("Target should be either sites or species")
}
} else {
if (target == "sites") {
ordering <- samples
} else if (target == "species") {
ordering <- features
} else {
stop("Target should be either sites or species")
}
}
# Determine the starting item (OTU or sample)
if (!is.null(first)) {
ordering <- chunk_reorder(ordering, first)
}
ordering
}
#' @title Radial Theta Function
#' @description Adapted from \pkg{NeatMap} and \pkg{phyloseq} packages
#' but not exported and hence not available via phyloseq. Completely
#' rewritten to avoid license conflicts. Vectorized to gain efficiency;
#' only calculates theta and omits r.
#' @param x position parameter
#' @return theta
#' @keywords internal
radial_theta <- function(x) {
x <- as(x, "matrix")
theta <- atan2((x[, 2] - mean(x[, 2])), (x[, 1] - mean(x[, 1])))
names(theta) <- rownames(x)
theta
}
#' @title Chunk Reorder
#' @description Chunk re-order a vector so that specified newstart is first.
#' Different than relevel.
#' @keywords internal
#' @details Borrowed from \pkg{phyloseq} package as needed here and not
#' exported there. Rewritten.
#' @return Reordered x
#' @examples
#' # Typical use-case
#' # chunk_reorder(1:10, 5)
#' # # Default is to not modify the vector
#' # chunk_reorder(1:10)
#' # # Another example not starting at 1
#' # chunk_reorder(10:25, 22)
#' # # Should silently ignore the second element of `newstart`
#' # chunk_reorder(10:25, c(22, 11))
#' # # Should be able to handle `newstart` being the first argument already
#' # # without duplicating the first element at the end of `x`
#' # chunk_reorder(10:25, 10)
#' # all(chunk_reorder(10:25, 10) == 10:25)
#' # # This is also the default
#' # all(chunk_reorder(10:25) == 10:25)
#' # # An example with characters
#' # chunk_reorder(LETTERS, 'G')
#' # chunk_reorder(LETTERS, 'B')
#' # chunk_reorder(LETTERS, 'Z')
#' # # What about when `newstart` is not in `x`? Return x as-is, throw warning.
#' # chunk_reorder(LETTERS, 'g')
chunk_reorder <- function(x, newstart=x[[1]]) {
pivot <- match(newstart[1], x, nomatch=NA)
# If pivot is NA, then warn and return x as-is
if (is.na(pivot)) {
warning("`newstart` argument not found from `x`.
- returning `x` with no reordering.")
newx <- x
} else {
newx <- c(tail(x, {
length(x) - pivot + 1
}), head(x, pivot - 1L))
}
newx
}