-
Notifications
You must be signed in to change notification settings - Fork 4
/
flywire-nuclei.R
185 lines (174 loc) · 7.61 KB
/
flywire-nuclei.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
#' Queries for information about flywire nuclei (helpful for finding somata)
#'
#' @description \code{flywire_nuclei} finds nuclei based on known \code{rootids}
#' or \code{nucleus_ids}
#' @inheritParams flywire_partners
#' @param nucleus_ids ids from the nucleus table to return (optional, NB only
#' one of \code{rootids} and \code{nucleus_ids} can be provided).
#' @param rawcoords Whether to return coordinates in raw form rather than nm
#' (default \code{FALSE})
#' @param ... Additional arguments passed to \code{\link{flywire_cave_query}}
#'
#' @return A data.frame containing information about nuclei including \itemize{
#'
#' \item id nucleus id
#'
#' \item pt_position the XYZ position of the centre of the nucleus. This will
#' always be in nm when \code{rawcoords=FALSE} even if the remote table stores
#' raw (uncalibrated) voxel coordinates. It will be a comma separated string
#' when rawcoords=TRUE since this is most convenient for pasting between
#' applications.
#'
#' \item pt_supervoxel_id corresponding to the nucleus may be missing if the
#' segmentation is disrupted at the location of the nucleus e.g. because of
#' masking issues, missing sections etc.
#'
#' \item pt_root_id The current root id (when the \code{pt_position} maps onto
#' the segmentation.)
#'
#' \item volume the volume in cubic microns of the nucleus
#'
#' }
#' @export
#' @importFrom dplyr right_join across ends_with
#' @importFrom nat xyzmatrix2str
#' @examples
#' \donttest{
#' # an example where there are two nucleus matches
#' flywire_nuclei(flywire_xyz2id(c(120152, 22864, 3564), rawcoords = TRUE))
#' }
flywire_nuclei <- function(rootids=NULL, nucleus_ids=NULL, rawcoords=FALSE, ...) {
if(!is.null(rootids) & !is.null(nucleus_ids))
stop("You must supply only one of rootids or nucleus_ids!")
res <- if(is.null(rootids) && is.null(nucleus_ids))
flywire_cave_query(table = nucleus_table_name(), ...)
else if(!is.null(rootids)) {
rootids=flywire_ids(rootids)
nuclei <- if(length(rootids)<200) {
rid=paste(rootids, collapse=',')
ridq=reticulate::py_eval(sprintf('{"pt_root_id": [%s]}', rid), convert = F)
flywire_cave_query(table = nucleus_table_name(), filter_in_dict=ridq, ...)
} else {
flywire_cave_query(table = nucleus_table_name(), live = F, ...)
}
# bail if root id is not in table
if(nrow(nuclei)==0) return(nuclei)
nuclei <- nuclei %>%
right_join(data.frame(pt_root_id=as.integer64(rootids)), by="pt_root_id") %>%
select(colnames(nuclei))
if (length(rootids) < 200) {
nuclei
} else {
nuclei %>%
mutate(pt_root_id = flywire_updateids(.data$pt_root_id,
svids = .data$pt_supervoxel_id))
}
} else {
nid=paste(nucleus_ids, collapse=',')
nidq=reticulate::py_eval(sprintf('{"id": [%s]}', nid), convert = F)
nuclei=flywire_cave_query(table = nucleus_table_name(), filter_in_dict=nidq, ...)
nuclei %>%
right_join(data.frame(id=nucleus_ids), by="id") %>%
select(colnames(nuclei))
}
res <- standard_nuclei(res)
if(isFALSE(rawcoords)) res else {
res %>%
mutate(across(ends_with("position"),
function(x) xyzmatrix2str(flywire_nm2raw(x))))
}
}
#' @description \code{flywire_nearest_nuclei} returns the nearest nucleus to a
#' query xyz location. When \code{rawcoords=T} both the input and output
#' positions are in raw voxels. Note however that distances are still
#' calculated in nm. \code{xyz} may contain single points unless \code{k>1},
#' in which case only one query point is allowed.
#'
#' @details \code{flywire_nearest_nuclei} caches the nucleus table and then
#' updates ids of any selected values. This saves time for subsequent queries
#' assuming that you are returning less than half of the total rows.
#'
#' @param xyz One or more (if \code{k=1}) query points. In raw coordinates when
#' \code{rawcoords=T}
#' @param k The number of nearest nuclei to return for each query position. When
#' \code{k>1} you are currently limited to one query point.
#'
#' @return For \code{flywire_nearest_nuclei} when \code{rawcoords=T} both the
#' input and output positions are in raw voxels. Note however that distances
#' are still calculated in nm.
#' @export
#' @rdname flywire_nuclei
#' @examples
#' \donttest{
#' nn=flywire_nearest_nuclei(c(480608, 91456, 142560), k=2)
#' as.data.frame(nn)
#'
#' flywire_nearest_nuclei('163113, 59074, 5295', rawcoords = TRUE)
#' }
#'
#' \dontrun{
#' # from clipboard e.g. copied from flywire
#' flywire_nearest_nuclei(clipr::read_clip(), rawcoords = TRUE)
#' }
flywire_nearest_nuclei <- function(xyz, rawcoords=F, k=1) {
xyz=xyzmatrix(xyz)
if(rawcoords) xyz=flywire_raw2nm(xyz)
if(k>1 && nrow(xyz)>1) stop("If k>1 you can only give one point")
nuclei <- cached_nuclei()
nnres=nabor::knn(xyzmatrix(nuclei$pt_position), xyz, k=k)
df=nuclei[c(nnres$nn.idx),,drop=F]
df$dist=c(nnres$nn.dists)
df$pt_root_id=flywire_updateids(df$pt_root_id, svids = df$pt_supervoxel_id)
if(isFALSE(rawcoords)) df else{
df %>%
mutate(across(ends_with("position"),
function(x) xyzmatrix2str(flywire_nm2raw(x))))
}
}
# private function to ensure that we always have coords in nm
#' @importFrom nat xyzmatrix2list
standard_nuclei <- function(df, datastack_name=getOption("fafbseg.cave.datastack_name", "flywire_fafb_production")) {
vd=nucleus_table_info(datastack_name = datastack_name)[['voxel_resolution']]
isnm=is.null(vd) || isTRUE(all.equal(vd,c(1,1,1)))
if(isnm) return(df)
# otherwise we need to scale up by the voxel dimensions in the table
df %>%
mutate(across(ends_with("position"),
function(x) xyzmatrix2list(flywire_raw2nm(x, vd=vd))))
}
# private function to cache nucleus table for given dataset
cached_nuclei <- memoise::memoise(function(datastack_name=getOption("fafbseg.cave.datastack_name", "flywire_fafb_production")) {
table=nucleus_table_name(datastack_name)
df=flywire_cave_query(table = table, live = T)
standard_nuclei(df)
}, ~memoise::timeout(3600))
# try to find the nucleus table in a consistent way so that we can use
# these functions for FANC as well
# see https://fanc-reconstruction.slack.com/archives/C0133P1GRV0/p1642233199005400
# and https://flywire-forum.slack.com/archives/C01M4LP2Y2D/p1642237506018900
# for some relevant discussion
nucleus_table_name <- memoise::memoise(function(datastack_name=getOption("fafbseg.cave.datastack_name", "flywire_fafb_production")) {
fac=flywire_cave_client(datastack_name=datastack_name)
dsinfo=fac$info$get_datastack_info()
if(!is.null(dsinfo$soma_table))
return(dsinfo$soma_table)
# do we have a table nuclei_v1 which we will hard code as preferred for now?
tt=fac$annotation$get_tables()
nucleus_tables <- grep("^nuclei_", tt)
if(length(nucleus_tables)==0)
stop("I cannot find a nucleus table for datastack: ", datastack_name,
"\nPlease ask for help on #annotation_infrastructure https://flywire-forum.slack.com/archives/C01M4LP2Y2D")
if(length(nucleus_tables)==1)
return(tt[nucleus_tables])
chosen=tt[rev(nucleus_tables)[1]]
warning("Multiple candidate nucleus tables. Choosing: ", chosen)
return(chosen)
}, ~memoise::timeout(60^2))
# nb we include datastack_name as an arg so it is part of the memoisation hash
# in case we ever have two datasets with a table with the same name
nucleus_table_info <- memoise::memoise(function(datastack_name=getOption("fafbseg.cave.datastack_name", "flywire_fafb_production")) {
fac=flywire_cave_client(datastack_name=datastack_name)
table=nucleus_table_name(datastack_name = datastack_name)
info=fac$annotation$get_table_metadata(table)
info
})