-
Notifications
You must be signed in to change notification settings - Fork 1
/
data_visualization.R
194 lines (167 loc) · 5.87 KB
/
data_visualization.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
#' Simple PCA analysis
#'
#' Performs a principal component analysis of a
#' normalized expression/abundance matrix and plot the samples
#' in the bidimensional space formed by the two first principal
#' components. Samples are colored according to the variable of
#' interest in the sample information data frame.
#'
#' @param mat The input matrix containing normalized data or counts
#' @param sampInfo The sample information data frame
#' @param groupCol The column name of the grouping variable
#'
#' @return A ggplot2 object containing the resulting plot
#' @export
#'
#' @importFrom ggplot2 autoplot theme_bw
#' @importFrom stats prcomp
#' @importFrom dplyr %>%
#' @import ggfortify
#'
pcaPlot <- function(mat, sampInfo, groupCol) {
pcRes <- mat %>%
t() %>%
stats::prcomp()
rownames(sampInfo) <- sampInfo[,1]
p <- ggplot2::autoplot(object = pcRes, data = sampInfo, colour = groupCol) +
ggplot2::theme_bw()
return(p)
}
#' Simple Volcano Plot
#'
#' Creates a simple volcano plot summarizing the number of differentially altered
#' features by comparison.
#'
#' @param intTable Data frame with differential analysis results
#' @param logFcCutoff Fold change cutoff
#' @param pCutoff Adjusted P value cutoff
#'
#' @return A ggplot2 object containing the resulting plot
#' @export
#'
#' @import dplyr
#' @import ggplot2
#'
volcanoPlot <- function(intTable, logFcCutoff = log2(2), pCutoff = 0.05) {
intTable <- intTable %>%
mutate(status = case_when(logFc >= logFcCutoff & pAdj <= pCutoff ~ "Up",
logFc <= -logFcCutoff & pAdj <= pCutoff ~ "Down",
TRUE ~ "Other")) %>%
mutate(comparison = factor(comparison, levels = unique(comparison)))
allFeatTable <- intTable %>%
group_by(comparison) %>%
summarise(total_n = n())
annotDf <- intTable %>%
group_by(comparison, status) %>%
summarise(n = n()) %>%
inner_join(x = ., y = allFeatTable, by = c("comparison")) %>%
mutate(pct = (n / total_n) * 100) %>%
mutate(xpos = case_when(status == "Up" ~ Inf,
status == "Down" ~ -Inf,
TRUE ~ 0),
ypos = Inf,
hjust = case_when(status == "Up" ~ 1,
status == "Down" ~ 0,
TRUE ~ 0.5),
vjust = 1,
label = paste0("N = ", n, "\n", round(pct, digits = 2), "%"))
outP <- intTable %>%
ggplot(aes(x = logFc, y = -log10(pAdj), label = feature, color = status)) +
geom_vline(xintercept = c(logFcCutoff, -logFcCutoff), lty = 2) +
geom_hline(yintercept = -log10(pCutoff), lty = 2) +
geom_label(data = annotDf, aes(x=xpos,y=ypos,hjust=hjust,vjust=vjust,label=label)) +
geom_point(alpha = 0.3) +
xlab("Log2 Fold Change") +
scale_color_manual(values = c("Up" = "red", "Down" = "blue", "Other" = "black")) +
facet_wrap(facets = vars(comparison)) +
theme_bw() +
theme(strip.background = element_rect(colour = "black", fill ="white"),
strip.placement = "inside",
panel.spacing = unit(0.2, "lines"),
panel.background=element_rect(fill="white"),
panel.border=element_rect(colour="black"))
return(outP)
}
#' Heatmap plot
#'
#' Creates a heatmap representation of the more variable features in a matrix
#'
#' @param mat The matrix to be plotted
#' @param sampInfo The sample information data frame
#' @param groupCol The column name of the grouping variable
#' @param scaleBy character indicating if the values should be centered and scaled in either the
#' row direction or the column direction, or none. Corresponding values are "row", "column" and "none"
#' @param nTop Number of features to be plotted after ranking them by the standard deviation
#' @param ... Arguments to be passed to \link[pheatmap]{pheatmap}
#'
#' @return The phetamap plot
#' @export
#'
#' @import dplyr
#' @import pheatmap
#'
heatmapPlot <- function(mat, sampInfo, groupCol, scaleBy = "row", nTop = 100, ...) {
rownames(sampInfo) <- sampInfo[, 1]
sampInfo <- dplyr::select(sampInfo, !!sym(groupCol))
intFeatures <- apply(mat, 1, sd) %>%
sort(. , decreasing = TRUE) %>%
.[1:nTop] %>%
names()
mat <- mat[intFeatures,]
outHm <- pheatmap(mat, scale = scaleBy, annotation_col = sampInfo, ...)
return(outHm)
}
#' Violin plot
#'
#' Creates a per-column violin plot to explore data density before and after normalization
#'
#' @param mat The matrix to be plotted
#'
#' @return A ggplot2 object
#' @export
#'
#' @import dplyr
#' @import ggplot2
#' @import tidyr
#'
violinPlot <- function(mat) {
p <- mat %>%
as.data.frame() %>%
rownames_to_column(var = "feature") %>%
tidyr::pivot_longer(-feature, names_to = "sample") %>%
ggplot(aes(x = sample, y = value, fill = sample)) +
geom_violin() +
guides(x = guide_axis(angle = 60)) +
theme_bw() +
theme(axis.title.x = element_blank(), legend.position = "none")
return(p)
}
#' GSEA plot
#'
#' Plots the results of a gsea analysis
#'
#' @param gseaResDf The output data frame from \link[biokit]{gseaFromStats}
#' @param pCutoff Adjusted P value cutoff to filter features before plotting
#'
#' @return A ggplot2 object
#' @export
#'
#' @import dplyr
#' @import ggplot2
#'
gseaPlot <- function(gseaResDf, pCutoff = 0.05) {
outP <- gseaResDf %>%
mutate(status = ifelse(NES > 0, "Up", "Down")) %>%
subset(padj <= pCutoff) %>%
ggplot(aes(x = comparison, y = pathway, fill = NES, size = -log10(padj), shape = status)) +
geom_point() +
scale_shape_manual(values = c("Up" = 24, "Down" = 25)) +
scale_fill_gradient2(high = "red", mid = "white", low = "blue", midpoint = 0) +
guides(x = guide_axis(angle = 60)) +
theme_bw() +
ggtitle(paste0("MSigDb Hallmarks Results (FDR <= ", pCutoff, ")")) +
theme(text = element_text(color = "black"),
axis.text = element_text(color = "black"),
plot.title = element_text(hjust = 0.5))
return(outP)
}