/
cell_types.R
188 lines (154 loc) · 7.38 KB
/
cell_types.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
#' Update cell type assignment for each metacell
#'
#' Change the cell type assignments for each metacell to the ones listed at \code{metacell_types_file}.
#'
#' This is usually done after a first iteration of annotation using the "Annotate" tab in the MCView annotation, which can export a valid \code{metacell_types_file}.
#' The file should have a column named "metacell" with the metacell ids and another
#' column named "cell_type" or "cluster" with the cell type assignment.
#'
#' Note that the exported file from the __MCView__ app contains additional fields
#' which will be ignored in this function.
#'
#' Under the hood - MCView updates a file named "metacell_types.tsv" under \code{project/cache/dataset}, which can also be edited manually.
#'
#' If the file contains an additional 'color' field, the cell type colors would be updated as well.
#'
#' @param project path to the project directory
#' @param dataset name for the dataset, e.g. "PBMC"
#' @param metacell_types_file path to a tabular file (csv,tsv) with cell type assignement for
#' each metacell. The file should have a column named "metacell" with the metacell ids and another
#' column named "cell_type" or "cluster" with the cell type assignment. Metacell ids that do
#' not exists in the data would be ignored.
#'
#' @export
#'
#' @examples
#' \dontrun{
#' update_metacell_types("PBMC", "PBMC163k", "raw/metacell-clusters.csv")
#' }
#'
#' @export
update_metacell_types <- function(project, dataset, metacell_types_file) {
verify_project_dir(project)
verify_app_cache(project)
prev_metacell_types <- load_shiny_data("metacell_types", dataset, project_cache_dir(project))
prev_metacell_types <- prev_metacell_types %>%
mutate(metacell = as.character(metacell))
raw_df <- fread(metacell_types_file)
metacell_types <- parse_metacell_types(raw_df)
metacell_types <- prev_metacell_types %>%
select(-cell_type) %>%
left_join(metacell_types %>% select(metacell, cell_type), by = "metacell")
serialize_shiny_data(metacell_types, "metacell_types", dataset = dataset, cache_dir = project_cache_dir(project), flat = TRUE)
cli_alert_success("Succesfully changed metacell cell type assignments")
if (has_name(raw_df, "color")) {
cli_alert_info("File has a field named 'color', updating also cell type colors.")
update_cell_type_colors(project, dataset, raw_df)
}
}
#' Update color assignment for each cell type
#'
#' Change the color assignments for each cell type to the ones listed at \code{cell_type_colors_file}.
#'
#' This is usually done after a first iteration of annotation using the "Annotate" tab in the MCView annotation, which can
#' export a valid \code{cell_type_colors_file}.
#'
#' The file should have a column named "cell_type" or "cluster" with the cell types and another column named "color" with the color assignment.
#' Note that the exported file from the __MCView__ app contains additional fields which will be
#' ignored in this function.
#'
#' Under the hood - MCView updates a file named "cell_type_colors.tsv" under \code{project/cache/dataset}, which can also be edited manually.
#'
#' @param project path to the project directory
#' @param dataset name for the dataset, e.g. "PBMC"
#' @param cell_type_colors_file path to a tabular file (csv,tsv) with color assignement for
#' each cell type. The file should have a column named "cell_type" or "cluster" with the
#' cell types and another column named "color" with the color assignment. Cell types that do not
#' exist in the metacell types would be ignored, so if you changed the names of cell types you would have to also
#' update the metacell types (using \code{update_metacell_types}).
#' The function also accepts output of the 'export' button from the application annotation page.
#' If this parameter is missing, MCView would use the \code{chameleon} package to assign a color for each cell type.
#'
#'
#' @examples
#' \dontrun{
#' update_metacell_types("PBMC", "PBMC163k", "raw/cluster-colors.csv")
#' }
#'
#' @export
update_cell_type_colors <- function(project, dataset, cell_type_colors_file) {
verify_project_dir(project)
verify_app_cache(project)
cell_type_colors <- parse_cell_type_colors(cell_type_colors_file)
serialize_shiny_data(cell_type_colors, "cell_type_colors", dataset = dataset, cache_dir = project_cache_dir(project), flat = TRUE)
cli_alert_success("Succesfully changed cell type color assignments")
}
parse_cell_type_colors <- function(cell_type_colors) {
if (is.character(cell_type_colors)) {
file <- cell_type_colors
cell_type_colors <- fread(cell_type_colors) %>% as_tibble()
} else {
file <- "cell_type_colors"
}
if (!has_name(cell_type_colors, "cell_type") && !has_name(cell_type_colors, "cluster")) {
cli_abort("{.field {file}} should have a column named {.field cell_type} or {.field cluster}")
}
if (!has_name(cell_type_colors, "color")) {
cli_abort("{.field {file}} should have a column named {.field color}")
}
if (rlang::has_name(cell_type_colors, "cluster")) {
cell_type_colors <- cell_type_colors %>% rename(cell_type = cluster)
}
cell_type_colors <- cell_type_colors %>%
filter(!is.na(cell_type), !is.na(color)) %>%
filter(cell_type != "(Missing)") %>%
distinct(cell_type, .keep_all = TRUE)
if (!has_name(cell_type_colors, "order")) {
cell_type_colors <- cell_type_colors %>% mutate(order = 1:n())
}
cell_type_colors <- cell_type_colors %>%
distinct(cell_type, color, order) %>%
select(cell_type, color, order)
n_colors <- cell_type_colors %>%
count(cell_type) %>%
pull(n)
if (any(n_colors > 1)) {
cli_abort("Some cell types appear more than once with different colors.")
}
return(cell_type_colors)
}
parse_metacell_types <- function(metacell_types, metacells = NULL) {
if (is.character(metacell_types)) {
metacell_types <- fread(metacell_types) %>% as_tibble()
}
if (!has_name(metacell_types, "metacell")) {
cli_abort("{.field {file}} should have a column named {.field metacell}")
}
if (!has_name(metacell_types, "cell_type") && !has_name(metacell_types, "cluster")) {
cli_abort("{.field {file}} should have a column named {.field cell_type} or {.field cluster}")
}
if (rlang::has_name(metacell_types, "cluster")) {
metacell_types <- metacell_types %>% rename(cell_type = cluster)
}
metacell_types <- metacell_types %>%
select(any_of(c("metacell", "cell_type", "age", "mc_age")))
if ("age" %in% colnames(metacell_types)) {
metacell_types <- metacell_types %>%
rename(mc_age = age)
}
metacell_types <- metacell_types %>%
mutate(metacell = as.character(metacell))
if (!is.null(metacells)) {
unknown_metacells <- metacell_types$metacell[!(metacell_types$metacell %in% metacells)]
if (length(unknown_metacells) > 0) {
mcs <- paste(unknown_metacells, collapse = ", ")
cli_abort("Metacell types contains metacells that are missing from the data: {.field {mcs}}")
}
missing_metacells <- metacells[!(metacells %in% metacell_types$metacell)]
if (length(missing_metacells) > 0) {
mcs <- paste(missing_metacells, collapse = ", ")
cli_warn("Some metacells are missing from metacell types: {.field {mcs}}")
}
}
return(metacell_types)
}