-
Notifications
You must be signed in to change notification settings - Fork 16
/
trajectory_inference.R
187 lines (170 loc) · 6.05 KB
/
trajectory_inference.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
#' @title Infer an initial trajectory through space
#'
#' @description \code{infer_initial_trajectory} infers an initial trajectory for
#' \code{\link{infer_trajectory}} by clustering the points and calculating
#' the shortest path through cluster centers. The shortest path takes into account
#' the euclidean distance between cluster centers, and the density between those two
#' points.
#'
#' @param space A numeric matrix or a data frame containing the coordinates of samples.
#' @param k The number of clusters
#'
#' @return the initial trajectory obtained by this method
#'
#' @importFrom RANN nn2
#' @importFrom TSP TSP insert_dummy solve_TSP
#' @importFrom stats kmeans dist
#' @importFrom dynutils scale_minmax add_class
infer_initial_trajectory <- function(space, k) {
# input checks
check_numeric_matrix(space, "space", finite = TRUE)
check_numeric_vector(k, "k", whole = TRUE, finite = TRUE, range = c(1, nrow(space) - 1), length = 1)
# cluster space into max k clusters
fit <- stats::kmeans(space, k)
centers <- fit$centers
# calculate the euclidean space between clusters
eucl_dist <- as.matrix(stats::dist(centers))
# calculate the densities along the straight lines between any two cluster centers
i <- j <- NULL # satisfy r cmd check
pts <-
crossing(
i = seq_len(k),
j = seq_len(k),
pct = seq(0, 1, length.out = 21)
) %>%
filter(i < j)
pts_space <- (1 - pts$pct) * centers[pts$i, ] + pts$pct * centers[pts$j, ]
pts$dist <- rowMeans(RANN::nn2(space, pts_space, k = 10)$nn.dist)
dendis <- pts %>% group_by(i, j) %>% summarise(dist = mean(dist)) %>% ungroup()
density_dist <- matrix(0, nrow = k, ncol = k)
density_dist[cbind(dendis$i, dendis$j)] <- dendis$dist
density_dist[cbind(dendis$j, dendis$i)] <- dendis$dist
# combine both distance matrices
cluster_distances <- eucl_dist * density_dist
# find the shortest path through all clusters
tsp <- TSP::insert_dummy(TSP::TSP(cluster_distances))
tour <- as.vector(TSP::solve_TSP(tsp))
tour2 <- c(tour, tour)
start <- min(which(tour2 == k + 1))
stop <- max(which(tour2 == k + 1))
best_ord <- tour2[(start + 1):(stop - 1)]
# use this ordering as the initial curve
init_traj <- centers[best_ord, , drop = FALSE]
init_traj
}
#' Infer linear trajectory through space
#'
#' \code{infer_trajectory} infers a trajectory through samples in a given space in a four-step process:
#' \enumerate{
#' \item Perform \emph{k}-means clustering
#' \item Calculate distance matrix between cluster centers using a custom distance function
#' \item Find the shortest path connecting all cluster centers using the custom distance matrix
#' \item Iteratively fit a curve to the given data using principal curves
#' }
#'
#' @inheritParams princurve::principal_curve
#' @param space A numeric matrix or a data frame containing the coordinates of samples.
#' @param k The number of clusters to cluster the data into.
#'
#' @return A list containing several objects:
#' \itemize{
#' \item \code{path}: the trajectory obtained by principal curves.
#' \item \code{time}: the time point of each sample along the inferred trajectory.
#' }
#'
#' @seealso \code{\link{reduce_dimensionality}}, \code{\link{draw_trajectory_plot}}
#'
#' @export
#'
#' @importFrom princurve principal_curve
#' @importFrom dynutils scale_minmax
#'
#' @examples
#' ## Generate an example dataset and visualise it
#' dataset <- generate_dataset(num_genes = 200, num_samples = 400, num_groups = 4)
#' space <- reduce_dimensionality(dataset$expression, ndim = 2)
#' draw_trajectory_plot(space, progression_group = dataset$sample_info$group_name)
#'
#' ## Infer a trajectory through this space
#' traj <- infer_trajectory(space)
#'
#' ## Visualise the trajectory
#' draw_trajectory_plot(space, path=traj$path, progression_group = dataset$sample_info$group_name)
infer_trajectory <- function(
space,
k = 4,
thresh = .001,
maxit = 10,
stretch = 0,
smoother = "smooth_spline",
approx_points = 100
) {
# input checks
check_numeric_matrix(space, "space", finite = TRUE)
init_traj <-
if (k <= 1) {
NULL
} else {
infer_initial_trajectory(space, k = k)
}
# iteratively improve this curve using principal_curve
fit <- princurve::principal_curve(
as.matrix(space),
start = init_traj,
thresh = thresh,
maxit = maxit,
stretch = stretch,
smoother = smoother,
approx_points = approx_points,
trace = FALSE,
plot_iterations = FALSE
)
# construct final trajectory
path <- fit$s[fit$ord, , drop = FALSE]
dimnames(path) <- list(NULL, paste0("Comp", seq_len(ncol(path))))
# construct timeline values
time <- dynutils::scale_minmax(fit$lambda)
# output result
list(
path = path,
time = time
) %>% dynutils::add_class(
"SCORPIUS::trajectory"
)
}
#' Reverse a trajectory
#'
#' Since the direction of the trajectory is not specified, the ordering of a trajectory may be inverted using \code{reverse_trajectory}.
#'
#' @param trajectory A trajectory as returned by \code{\link{infer_trajectory}}.
#'
#' @return The same trajectory, but in the other direction.
#'
#' @seealso \code{\link{infer_trajectory}}
#'
#' @importFrom methods is
#'
#' @export
#'
#' @examples
#' ## Generate an example dataset and infer a trajectory through it
#' dataset <- generate_dataset(num_genes = 200, num_samples = 400, num_groups = 4)
#' group_name <- dataset$sample_info$group_name
#' space <- reduce_dimensionality(dataset$expression, ndim = 2)
#' traj <- infer_trajectory(space)
#'
#' ## Visualise the trajectory
#' draw_trajectory_plot(space, group_name, path = traj$path)
#'
#' ## Reverse the trajectory
#' reverse_traj <- reverse_trajectory(traj)
#' draw_trajectory_plot(space, group_name, path = reverse_traj$path)
#'
#' plot(traj$time, reverse_traj$time, type = "l")
reverse_trajectory <- function(trajectory) {
if (!is(trajectory, "SCORPIUS::trajectory"))
stop(sQuote("trajectory"), " needs to be an object returned by infer_trajectory")
trajectory$time <- 1 - trajectory$time
trajectory$path <- trajectory$path[rev(seq_len(nrow(trajectory$path))), , drop = FALSE]
trajectory
}