-
Notifications
You must be signed in to change notification settings - Fork 0
/
visualization.R
159 lines (143 loc) · 6.55 KB
/
visualization.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
#' Plot label rate histogram
#'
#' Plots the label rate histogram together with the growth rate as a reference point. Supports plotting of multiple datasets by coloring according to growth rate.
#'
#' @param data data with label rate calculated, including growth rate
#' @export
tor_plot_label_rate_hist <- function(data) {
# safety checks for data
if (missing(data)) stop("need to supply a data set", call. =FALSE)
if (!is.data.frame(data)) {
glue("wrong data type supplied: {class(data)[1]}") %>% stop(call. = FALSE)
}
# safety checks for specific variables
columns <- c("growth_rate", "label_rate")
missing <- setdiff(columns, names(data))
if (length(missing) > 0) {
glue("column '{collapse(missing, sep = ', ', last = ' and ')}' does not exist in the dataset") %>% stop(call. = FALSE)
}
data %>%
filter(!is.na(label_rate)) %>%
ggplot() +
aes(x = label_rate, fill = factor(signif(growth_rate, 2))) +
geom_histogram(position = "dodge", bins = 80) +
geom_vline(data = function(x) unique(x %>% select(growth_rate)),
mapping = aes(xintercept = growth_rate)) +
scale_y_continuous(expand = c(0, 0)) +
labs(x = "label rate", fill = "growth rate") +
theme_bw()
}
#' Plot label rate error
#'
#' Visualize the label rate and residual standard error. Displays the residual standard error (i.e. the normalized error of the fit) for each calculated label rate to easily evaluate what quality cutoffs might be useful. Plots the growth rate as a reference and the individual standard errors of the parameters from the fit. Supports plotting of multiple datasets by coloring according to growth rate.
#'
#' @param data data with label rate calculated, including growth rate standard errors
#' @export
tor_plot_label_rate_error <- function(data) {
# safety checks for data
if (missing(data)) stop("need to supply a data set", call. =FALSE)
if (!is.data.frame(data)) {
glue("wrong data type supplied: {class(data)[1]}") %>% stop(call. = FALSE)
}
# safety checks for specific variables
columns <- c("prot_id", "growth_rate", "label_rate", "fit_rse", "label_rate_se")
missing <- setdiff(columns, names(data))
if (length(missing) > 0) {
glue("column '{collapse(missing, sep = ', ', last = ' and ')}' does not exist in the dataset") %>% stop(call. = FALSE)
}
data %>%
filter(!is.na(label_rate)) %>%
mutate(`growth rate` = factor(signif(growth_rate, 2))) %>%
ggplot() +
aes(text = prot_id, x = label_rate, y = fit_rse,
color = `growth rate`) +
geom_errorbarh(map = aes(xmin = label_rate - label_rate_se, xmax = label_rate + label_rate_se)) +
geom_vline(data = function(x) unique(x %>% select(growth_rate)),
mapping = aes(xintercept = growth_rate)) +
geom_point() +
scale_y_continuous(labels = function(x) str_c(100*x)) +
expand_limits(y = 0) +
#scale_color_gradient2(low = "blue", high = "red", mid = "green") +
#theme(legend.position = "none") +
labs(y = "residual standard error of fit [% labelled]", x = "label rate [1/time]") +
theme_bw()
}
#' Plot labeling curves
#'
#' Visualizes the labeling fits for peptides or proteins. Selects a random subset from the provided data and plots a maximum of \code{plot_number} of plots.
#'
#' @param data data with label rate (and error), growth rate (and error)
#' @param grouping by protein or peptide for visualization ??
#' @param time_col the name of the column that holds the time, assumes "hours" by default.
#' @param plot_number designate the maximum number of curves you want to plot. Use \code{plot_number = NULL} to plot all - be careful with this option, the plot might take a long time to render with a larger number of proteins or peptides.
#' @param random_seed specify this to get reproducible selection of "random" plots
#' @export
tor_plot_labeling_curves <- function(data, grouping = protein, time_col = "hours", plot_number = 6, random_seed = random()) {
# safety checks for data
if (missing(data)) stop("need to supply a data set", call. =FALSE)
if (!is.data.frame(data)) {
glue("wrong data type supplied: {class(data)[1]}") %>% stop(call. = FALSE)
}
# safety checks for specific variables
columns <- c("prot_id", "nested_data", "label_rate", "growth_rate", "fit")
missing <- setdiff(columns, names(data))
if (length(missing) > 0) {
glue("column '{collapse(missing, sep = ', ', last = ' and ')}' does not exist in the dataset") %>% stop(call. = FALSE)
}
# safety checks for specific variables in nested data
unnested_checks <- data$nested_data[[1]]
columns <- c(time_col, "frac_lab")
missing <- setdiff(columns, names(unnested_checks))
if (length(missing) > 0) {
glue("columns '{collapse(missing, sep = ', ', last = ' and ')}' do not exist in the dataset") %>% stop(call. = FALSE)
}
# check grouping mode
by_peptide <- "peptide_seq" %in% names(data)
# random seed
random <- function() sample(.Random.seed, 1)
set.seed(random_seed)
# generate plot data df
plot_data <- data %>%
{
if (is.null(plot_number))
.
else
sample_n(., size = min(nrow(data), plot_number))
} %>%
# introduce a unique row number to group lines together easily (no matter if it's proteins or peptides)
mutate(`#` = row_number()) %>%
# calculate curves
mutate(curves =
pmap(list(
data = nested_data,
label_rate = label_rate,
growth_rate = growth_rate
),
function(data, label_rate, growth_rate) {
data_frame(
!!time_col := seq(0, max(data[[time_col]]), length.out = 20),
fit = 1 - exp(-label_rate * hours),
expected = 1 - exp(-growth_rate * hours)
) %>%
gather(curve, frac_lab, !!quo(-!!sym(time_col)))
})
)
p <- plot_data %>%
ggplot() +
aes_q(x = sym(time_col), y = sym("frac_lab"), group = sym("#")) +
suppressWarnings(
# warnings need to be surpressed because of the text aesthetic for plotly
geom_point(data = function(x) unnest(x, nested_data) %>% filter(!is.na(frac_lab)),
mapping = aes(text = peptide_seq))
) +
geom_line(data = function(x) unnest(x, curves) %>% filter(!is.na(frac_lab)),
mapping = aes(color = NULL, group = curve, linetype = curve)) +
theme_bw() +
scale_y_continuous("fraction labeled", labels = scales::percent)
# coloring and facetting
if (by_peptide)
p <- p + aes(color = peptide_seq) + facet_wrap(~peptide_seq)
else
p <- p + aes(color = prot_id) + facet_wrap(~prot_id)
return(p)
}