-
Notifications
You must be signed in to change notification settings - Fork 0
/
04.3_Heatmap_COGs
228 lines (173 loc) · 8.59 KB
/
04.3_Heatmap_COGs
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# Pathway analysis
# Go to 5.4 : https://mkempenaar.github.io/gene_expression_analysis/chapter-5.html
library("seqinr") # for making fasta files
library("DESeq2") # for handeling the DESeq object
library("readxl") # for reading eggnog output excel files
library("pheatmap") # for generating heatmap
library(scales) # for making a good color gradient for heatmaps
library("reshape2") # for formatting the tables that is input to heatmap function
library("ggplot2")
#-----------Load-data---------------------------------------------
setwd("C:/Users/hr/Documents/AlgaeProject_R_scripts")
# made in 03_DESeqObject
res_Ax_NonAx <- readRDS("R_outputs/res_Ax_NonAx.rds")
res_Ax_YP206 <- readRDS("R_outputs/res_Ax_YP206.rds")
res_Ax_YP26 <- readRDS("R_outputs/res_Ax_YP26.rds")
dds <- readRDS("R_outputs/dds.rds") #DESeq object
res <- readRDS("R_outputs/res.rds") #result table from DESeq object
#------------subset into up/down regulated genes--------------------------------
p_threshold <- 0.05 #to get significant DEGs only
sigDEGs_Ax_NonAx_up <- subset(res_Ax_NonAx, padj < p_threshold & log2FoldChange > 0)
nrow(sigDEGs_Ax_NonAx_up) # 119
sigDEGs_Ax_NonAx_down <- subset(res_Ax_NonAx, padj < p_threshold & log2FoldChange < 0)
nrow(sigDEGs_Ax_NonAx_down) # 23
sigDEGs_Ax_YP206_up <- subset(res_Ax_YP206, padj < p_threshold & log2FoldChange > 0)
nrow(sigDEGs_Ax_YP206_up) # 171
sigDEGs_Ax_YP206_down <- subset(res_Ax_YP206, padj < p_threshold & log2FoldChange < 0)
nrow(sigDEGs_Ax_YP206_down) # 219
sigDEGs_Ax_YP26_up <- subset(res_Ax_YP26, padj < p_threshold & log2FoldChange > 0)
nrow(sigDEGs_Ax_YP26_up) # 17
sigDEGs_Ax_YP26_down <- subset(res_Ax_YP26, padj < p_threshold & log2FoldChange < 0)
nrow(sigDEGs_Ax_YP26_down) # 39
#------------generate fasta files with sig DEGs from contrasts------------------
#read fasta med mrna seqs that I used as reference earlier
mrna_fasta <- read.fasta("funannotate/Nannochloropsis_oceanica_CCAP211.mrna-transcripts.fa")
head(names(mrna_fasta))
# use function to write fasta files for all up/downregulated genes in three contrasts
# They are saved in R_outputs
# takes time to run!
source("00_functions.R")
DEGs_fasta(sigDEGs_Ax_NonAx_up)
DEGs_fasta(sigDEGs_Ax_NonAx_down)
DEGs_fasta(sigDEGs_Ax_YP206_up)
DEGs_fasta(sigDEGs_Ax_YP206_down)
DEGs_fasta(sigDEGs_Ax_YP26_up)
DEGs_fasta(sigDEGs_Ax_YP26_down)
# upload these fasta files to eggnog website:
# http://eggnog-mapper.embl.de/
# Understand eggnog COG categories: # https://www.ncbi.nlm.nih.gov/pmc/articles/PMC102395/
#-------------Group DEGs into COG categories from eggnog------------------------
# read the excel file with COG letters and their meening
COG_letters <- read.csv2("eggnog/COG_letters.csv")
#View(COG_letters)
# read the excel files generated by eggnog--------------------------------------
eggnog_Ax_NonAx_up <- read_excel("eggnog/mrna_sigDEGs_Axenic_NonAx_up.xlsx", skip=2)
# The number of DEGs lost in the eggnog analysis
nrow(sigDEGs_Ax_NonAx_up) - (nrow(eggnog_Ax_NonAx_up) - 3) #98
100-(nrow(eggnog_Ax_NonAx_up) - 3)/nrow(sigDEGs_Ax_NonAx_up)*100 #82%
eggnog_Ax_NonAx_down <- read_excel("eggnog/mrna_sigDEGs_Axenic_NonAx_down.xlsx", skip=2)
# The number of DEGs lost in the eggnog analysis
nrow(sigDEGs_Ax_NonAx_down) - (nrow(eggnog_Ax_NonAx_down) - 3) #22
100-(nrow(eggnog_Ax_NonAx_down) - 3)/nrow(sigDEGs_Ax_NonAx_down)*100 #95%
eggnog_Ax_YP26_up <- read_excel("eggnog/mrna_sigDEGs_Axenic_YP26_up.xlsx", skip=2)
nrow(sigDEGs_Ax_YP26_up) #17
# The number of DEGs lost in the eggnog analysis
# NO GENES GOT ANNOTATED! LOST 17 GENES = 100%
eggnog_Ax_YP26_down <- read_excel("eggnog/mrna_sigDEGs_Axenic_YP26_down.xlsx", skip=2)
# The number of DEGs lost in the eggnog analysis
nrow(sigDEGs_Ax_YP26_down) - (nrow(eggnog_Ax_YP26_down) - 3) #25
100-(nrow(eggnog_Ax_YP26_down) - 3)/nrow(sigDEGs_Ax_YP26_down)*100 #64%
eggnog_Ax_YP206_up <- read_excel("eggnog/mrna_sigDEGs_Axenic_YP206_up.xlsx", skip=2)
# The number of DEGs lost in the eggnog analysis
nrow(sigDEGs_Ax_YP206_up) - (nrow(eggnog_Ax_YP206_up) - 3) #146
100-(nrow(eggnog_Ax_YP206_up) - 3)/nrow(sigDEGs_Ax_YP206_up)*100#85%
eggnog_Ax_YP206_down <- read_excel("eggnog/mrna_sigDEGs_Axenic_YP206_down.xlsx", skip=2)
nrow(sigDEGs_Ax_YP206_down) - (nrow(eggnog_Ax_YP206_down) - 3) #142
100-(nrow(eggnog_Ax_YP206_down) - 3)/nrow(sigDEGs_Ax_YP206_down)*100 #65%
#prepare list as input----------------------------------------------------------
input <- list(
Ax_NonAx_up = eggnog_Ax_NonAx_up,
Ax_YP26_up = eggnog_Ax_YP26_up,
Ax_YP206_up = eggnog_Ax_YP206_up,
Ax_NonAx_down = eggnog_Ax_NonAx_down,
Ax_YP26_down = eggnog_Ax_YP26_down,
Ax_YP206_down = eggnog_Ax_YP206_down
)
# from 00_functions
source("00_functions.R")
# count all the COG letters in tables containing DEGs
COG_sample_count <- eggnog_letter_count(input)
#View(COG_sample_count)
# format for pheatmap
format_COG <- COG_sample_count[-2]
rownames(format_COG) <- format_COG$letter
format_COG <- format_COG[-1]
format_COG <- as.matrix(format_COG)
format_COG_gg <- melt(format_COG)
colnames(format_COG_gg) <- c("letter", "group", "count")
format_COG_gg$regulation <- rep(c("More expressed","Less expressed"),each=length(COG_letters$letter)*3)
#View(format_COG_gg)
#plotting
#View(format_COG_gg)
p1 <- ggplot(format_COG_gg, aes(group, letter, fill= count)) +
geom_tile()
# plotting raw counts doesnt give good colors
p2 <- p1 + #theme_classic()+
geom_raster() + labs(y = "COG category", x="") +
facet_grid(~regulation, scales = "free", space = "free")+
theme(panel.background = element_rect(fill = "white"),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
strip.background = element_blank(),
strip.text.x=element_text(size = 15, face="bold", color="black"), #strip.text.x = element_blank(), #
strip.placement = "outside", # Place facet labels outside x axis labels.
#strip.background = element_rect(fill = "white"), # Make facet label background white.
axis.title.x = element_text(size = 15, face = "bold"), axis.title.y = element_text(size = 18,face = "bold", vjust=2.5),
axis.text.x = element_text(size = 15, hjust=1, angle=45, face = "bold", color="black"), axis.text.y = element_text(size = 16, hjust = 1, color="black"),
legend.position = "right",
legend.key.size = unit(0.8, "cm"),
legend.text=element_text(size=15),
legend.title = element_text(size=15, face="bold")
) +
scale_x_discrete(labels=c("Ax_NonAx_down" = "NonAxenic", "Ax_YP26_down" = "YP26", "Ax_YP206_down" = "YP206",
"Ax_NonAx_up" = "NonAxenic", "Ax_YP26_up" = "YP26", "Ax_YP206_up" = "YP206"))
p2
#strip.background = element_rect(color="black", fill="white", size=1.5),
#strip.text.y = element_blank(),
# Make heatmaps where you adjust the color gradient so that it is not linear
m <- format_COG_gg
qn = quantile(m$count, c(0.01, 0.99), na.rm = TRUE)
qn01 <- rescale(c(qn, range(m$count)))
#heatmap3
heatmap3 <- p2 + scale_fill_gradientn(colours = colorRampPalette(c("darkblue", "red", "orange", "yellow"))(100),
values = c(0, seq(qn01[1], qn01[2], length.out = 10), 1))
heatmap3
# no. of S proteins
sum(format_COG_gg[format_COG_gg$letter == "S",]$count) #17
format_COG_gg[format_COG_gg$group == "Ax_YP206_down"& format_COG_gg$letter =="J",] #33
format_COG_gg[format_COG_gg$group == "Ax_YP206_down"& format_COG_gg$letter =="E",] #7
ggsave(
"heatmap3.pdf",
plot = heatmap3,
path = "R_outputs/",
height = 6,
width = 5,
dpi = 1000
)
# make heatmap where you log2 transform data and plot it with a linear color gradient
x <- format_COG_gg$count
format_COG_gg$count_log2 <- ifelse(x==0,0,log10(x))
#View(format_COG_gg)
#basic plot
b1 <- ggplot(format_COG_gg, aes(group, letter, fill= count_log2)) +
geom_tile()
b2 <- b1 + theme_Publication()+
geom_raster() +labs(y = "COG category", x="")+
theme(strip.text.x = element_blank(),
strip.text.y = element_blank(),
axis.title.x = element_text(size = 16), axis.title.y = element_text(size = 16),
axis.text.x = element_text(size = 16, hjust=1, angle=60), axis.text.y = element_text(size = 16, hjust = 1),
legend.key.size = unit(0.8, "cm"),
legend.text=element_text(size=17)
) +
facet_grid(~regulation, scales = "free", space = "free")
b2
heatmap4_logt <- b2 + scale_fill_gradientn(colours = c("darkblue", "red", "orange", "yellow"))
ggsave(
"heatmap4_logt.pdf",
plot = heatmap4_logt,
path = "R_outputs/",
height = 7,
width = 7,
dpi = 1000
)