-
Notifications
You must be signed in to change notification settings - Fork 0
/
GO_analysis.Rmd
217 lines (165 loc) · 7.98 KB
/
GO_analysis.Rmd
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
---
title: "GO analysis"
author: "ERM"
date: "`r Sys.Date()`"
output: html_document
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
# GO Analysis
I have created several files from the RNA analysis that contain the significant genes(determined by adj.P.val \< 0.1) from each Time and Condition. The names of the files are in the following format: 'sigV'+Drug(2 letters)+time.
example: 'sigVDA3.txt' means this file contains the significant DE genes from the Daunorubicin 3 hour compared to Vehicle Control 3 hour analysis
```{r Import libraries, message=FALSE, warning=FALSE}
library(gprofiler2)
library(tidyverse)
library(readr)
library(BiocGenerics)
library(gridExtra)
library(VennDiagram)
```
```{r Import data, echo=FALSE, message=FALSE, warning=FALSE}
##note, in this chunk the .rmd file is in the cm/analysis folder. need to back out with ..
##this code checks the directory and searches for the pattern, returning the full name of the file
file.names <- list.files(path = "data/", pattern = "sig*", ignore.case = TRUE,full.names = TRUE)
##Next I use the list of names and lapply to read all files into a list
##made the csv file to reimport
#write.csv(filenameonly,"data/filenameonly.txt", col.names = FALSE,row.names = FALSE)
##readin the filenameonly.txt file from data to use for naming and filtering.
filenameonly <- read_csv("data/filenameonly.txt")
#loop through the list of files and make a separate dataframe for each file under the 'real' name of the data set
for (k in 1:length(file.names)){
assign(paste0(filenameonly$x[k]) , read.csv(file.names[k]))
}
##rename the columns to the previous names
colnames(sigVDA24)<- c("ENTREZID","SYMBOL","logFC","AveExpr","t","P.Value","adj.P.Val","B")
colnames(sigVDX24)<- c("ENTREZID","SYMBOL","logFC","AveExpr","t","P.Value","adj.P.Val","B")
colnames(sigVEP24)<- c("ENTREZID","SYMBOL","logFC","AveExpr","t","P.Value","adj.P.Val","B")
colnames(sigVMT24)<- c("ENTREZID","SYMBOL","logFC","AveExpr","t","P.Value","adj.P.Val","B")
colnames(sigVTR24)<- c("ENTREZID","SYMBOL","logFC","AveExpr","t","P.Value","adj.P.Val","B")
colnames(sigVDA3)<- c("ENTREZID","SYMBOL","logFC","AveExpr","t","P.Value","adj.P.Val","B")
colnames(sigVDX3)<- c("ENTREZID","SYMBOL","logFC","AveExpr","t","P.Value","adj.P.Val","B")
colnames(sigVEP3)<- c("ENTREZID","SYMBOL","logFC","AveExpr","t","P.Value","adj.P.Val","B")
colnames(sigVMT3)<- c("ENTREZID","SYMBOL","logFC","AveExpr","t","P.Value","adj.P.Val","B")
colnames(sigVTR3)<- c("ENTREZID","SYMBOL","logFC","AveExpr","t","P.Value","adj.P.Val","B")
```
The analysis is based on all genes that passed the rowMeans\>0 from the previous page [link](https://reneeisnowhere.github.io/Cardiotoxicity/RNAseqrun_1_analysis.html)
```{r uploading the background genes, echo=FALSE, message=FALSE, warning=FALSE, paged.print=FALSE}
#backGL <- efit2$genes ***making the list
#write.csv(backGL, "data/backGL.txt", row.names = FALSE)
##read the .csv the render into txt in the data file!
backGL <- read_csv("data/backGL.txt",
col_types = cols(...1 = col_skip()))
```
Below is the analysis of differentially expressed genes for each treatment at 3 hours and 24 hours.
```{r intial analysis, eval=TRUE, echo=FALSE, fig.height=19, fig.width=10, message=FALSE, warning=FALSE}
gostres <- gost(query = sigVDA3$SYMBOL, organism = "hsapiens",
ordered_query = TRUE,
domain_scope = "custom",
measure_underrepresentation = FALSE,
evcodes = FALSE,
user_threshold = 0.0001,
correction_method = c("bonferroni"),
custom_bg = backGL$ENTREZID,
sources=c("GO:BP","GO:MF", "GO:CC"))
p <- gostplot(gostres, capped = FALSE, interactive = TRUE)
p
publish_gosttable(
gostres,
highlight_terms = NULL,
use_colors = TRUE,
show_columns = c("source", "term_name", "term_size", "intersection_size"),
filename = "../output/test.pdf",
ggplot = TRUE
)
```
I first looked at the data with all genes from the sigDA3 dataset. I used the list of all genes based on my rowMeans\>0 filtering as background.
### Analysis of Up versus Down
I then separated the VDA3 file by log2 Fold Change to see how the gene sets are enriched. Nothing showed up in the GO-BP/CC/MG-down regulated gene-set at a significant level, p<0.05.
```{r only the Da3 data3, echo=FALSE, message=FALSE, warning=FALSE}
#results_sig = subset(sigVDA3,adj.P.Val < 0.05)
# get the significant up-regulated genes
up = subset(sigVDA3, logFC > 0)
# get the significant down-regulated genes
down = subset(sigVDA3, logFC < 0)
gp_up = gost(query = up$ENTREZID, organism = "hsapiens",
ordered_query = TRUE,
domain_scope = "custom",
measure_underrepresentation = FALSE,
evcodes = FALSE,
user_threshold = 0.01,
correction_method = c("bonferroni"),
custom_bg = backGL$ENTREZID,
sources=c("GO:BP","GO:MF", "GO:CC"))
#gp_down = gost(query = down$ENTREZID, organism = "hsapiens",
# ordered_query = TRUE,
# domain_scope = "custom",
# measure_underrepresentation = FALSE,
# evcodes = FALSE,
# user_threshold = 0.05,
# correction_method = c("bonferroni"),
# custom_bg = backGL$ENTREZID,
# sources=c("GO:BP","GO:MF", "GO:CC"))
p2_up <- gostplot(gp_up, capped = FALSE, interactive = TRUE)
p2_up #+ ggtitle("Daunorubicin up regulated gene enrichment at 3 hours")
```
```{r showing it all, echo=FALSE, message=FALSE, warning=FALSE}
#p2_down <- gostplot(gp_down, capped = FALSE, interactive = TRUE)
#p2_down #+ ggtitle("Daunorubicin down regulated gene enrichment at 3 hours")
```
#### I next wanted to see what happened at 24 hours with daunorubicin. I used the sigVDA24 file to do this.
unfortunately the enrichment below 0.0001
```{r DA24, eval=TRUE, echo=FALSE, fig.height=10, fig.width=10, message=FALSE, warning=FALSE}
gostresDA24 <- gost(query = sigVDA24$SYMBOL, organism = "hsapiens",
ordered_query = TRUE,
domain_scope = "custom",
measure_underrepresentation = FALSE,
evcodes = FALSE,
user_threshold = 0.001,
correction_method = c("bonferroni"),
custom_bg = backGL$ENTREZID,
sources=c("GO:BP","GO:MF", "GO:CC"))
pDA24 <- gostplot(gostres, capped = FALSE, interactive = TRUE)
pDA24
publish_gosttable(gostresDA24,
highlight_terms = NULL,
use_colors = TRUE,
show_columns = c("source", "term_name", "term_size", "intersection_size"),
filename = "../output/test.pdf",
ggplot = TRUE
)
```
# Graphing specific gene expression
First get a list of genes you want to see. There are multiple was to "see" these. I used the word 'apple' to store my list
```{r genelist, eval=FALSE, message=FALSE, warning=FALSE, include=FALSE}
apple <- c('CDKN1A',"BBC3", "MDM2", "BCL2", "BAX", "GPX1", 'MFN2')#,"HAS3",'CYBA','TOP2B', 'TP53', 'ABCC1', 'ABCC5', 'RRAGD', 'DUSP13', 'NDUFAF1', 'TDP2', 'TXNIP','BRCA1', 'CTCF','RAD21','RYR2')
##find the index number for each gene
indices <- match(apple, x$genes$SYMBOL)
###subset the matrix
entreset <- x$genes$ENTREZID[indices]
gnames <- cbind(entreset,apple)
colnames(gnames) <- c("ENTREZID","SYMBOL" )
## Now to investigate specific gene expression and analyse similar vs difference of genes up and down between drug "classes" (anthracycline vs non-anthracycline).
for (gn in indices){
print(Da24counts %>% filter(Tags == gn) %>% ggplot(aes(x=as.factor(Samples), y=Freq))+
geom_boxplot(aes(fill = as.factor(Samples)))+
theme_cowplot(font_size =12,)+
ggtitle(paste0(gnames$gnames," expression in Daunorubicin at 24 hours"))+
labs(y = "Log2(cpm)",x= "", fill = "Treatment"))
}
```
```{r function for plotting, eval=FALSE, message=FALSE, warning=FALSE, include=FALSE}
plot_function <- function(index, df) {
for (i in index) {
data <- df %>% filter(ENTREZID == i)
if (!nrow(data)) next
print(data %>% ggplot(aes(x=as.factor(Samples), y=Counts))+
geom_boxplot(aes(fill = as.factor(Samples)))+
theme_cowplot(font_size =12,)+
ggtitle(paste0(i," expression "))+
labs(y = "Log2(cpm)",x= "", fill = "Treatment"))
}
}
apple <- as.data.frame(sigVDA24$ENTREZID)
plot_function(apple,Dx3counts)+theme(axis.text.x = element_text(size= 8,angle = 90))+scale_x_discrete(labels=ENTREZID)
```