-
Notifications
You must be signed in to change notification settings - Fork 1
/
Diff_Heatmap.r
211 lines (196 loc) · 11.3 KB
/
Diff_Heatmap.r
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
library(stringr)
library(ggplot2)
library(tidyr)
library(dplyr)
library(hexbin)
library(viridis)
library(patchwork)
library(pheatmap)
library(RColorBrewer)
library(ComplexHeatmap)
library(circlize)
library(ggpubr)
library(wesanderson)
library(corrplot)
library(Hmisc)
library(ggrepel)
# path = "D:\\Dropbox (Weizmann Institute)\\Aaron Javitt\\PSME4_PaperDraft\\PSME4 Figures\\Figure2_MAPPQC\\"
path = "C:\\Users\\ajavi\\Dropbox (Weizmann Institute)\\PSME4\\PSME4 Figures\\Figure2_MAPPQC\\"
Proteins_noImp = read.csv(paste0(path,"060219_1_Analysis\\060219_1_DataSnapshots\\060219_1_D_ProteinsAfterFilteringNonValidProteins.txt"),sep = "\t", header = 1,stringsAsFactors = FALSE)
diffProteinTable = Proteins_sub1[unlist(diffProteins),c(Tumor_col,Adj_col)]
rownames(diffProteinTable) = unlist(diffProteins)
rownames(names_frame) = names_frame$original
colnames(diffProteinTable) = names_frame[c(Tumor_col,Adj_col),"cleanColname"]
col_fun = colorRamp2(c(0,10), c("white", "darkred"))
diffProteinTable[is.na(diffProteinTable)] = 0
abun_col = colnames(Proteins_noImp)[198:213]
cleanColname = str_sub(abun_col,start = 39,end = -9)
names_frame <- as.data.frame(cleanColname)
names_frame$original = abun_col
names_frame$Sample = factor(str_sub(names_frame$cleanColname ,start = 1,end = 1))
names_frame$Sample = recode(names_frame$Sample,N = "Adjacent",T = "Tumor")
names_frame$ID = str_sub(names_frame$cleanColname,start = 3)
Tumor_col = names_frame[names_frame$Sample== "Tumor","original"]
Adj_col= names_frame[names_frame$Sample== "Adjacent","original"]
Proteins_sub = Proteins_noImp[,c("First.Gene.Name",Tumor_col,Adj_col)]
Proteins_sub$T_count = apply(Proteins_noImp[,Tumor_col], 1 ,function(x) 8 - sum(is.na(x)))
Proteins_sub$A_count = apply(Proteins_noImp[,Adj_col], 1 ,function(x) 8 - sum(is.na(x)))
Proteins_sub$dif_count = Proteins_sub$T_count - Proteins_sub$A_count
Proteins_sub1 = Proteins_sub[!str_detect(Proteins_sub$First.Gene.Name,"PSM") & !str_detect(Proteins_sub$First.Gene.Name,"IGH") & !str_detect(Proteins_sub$First.Gene.Name,"IGK"),]
Proteins_sub1 = Proteins_sub1[order(Proteins_sub1$dif_count),]
Proteins_sub1$First.Gene.Name = factor(Proteins_sub1$First.Gene.Name,levels = (Proteins_sub1[order(Proteins_sub1$dif_count),"First.Gene.Name"]))
Proteins_sub1$ind = 1:nrow(Proteins_sub1)
diffProteins = Proteins_sub1 %>% filter(abs(dif_count) >=5) %>% select(First.Gene.Name)
#TCGA
TCGA = read.csv(paste0(path,"TCGA_MAPPdifferentials.csv",sep = ""),row.names = 1)
TCGA_sub = TCGA %>% filter(sample_type %in% c("Primary Tumor", "Solid Tissue Normal"))
TCGA_annot = TCGA_sub %>% select(sample_type)
TCGA_numeric = TCGA_sub %>% select(CDH5:DENR) %>% t() %>% data.frame()
TCGA_numeric_norm = data.frame(t(scale(t(TCGA_numeric))))
TCGA_numeric_norm = TCGA_numeric_norm[as.character(diffProteins$First.Gene.Name),]
rownames(TCGA_numeric_norm) = as.character(diffProteins$First.Gene.Name)
TCGA_annot_map = HeatmapAnnotation(df = data.frame(Sample = TCGA_annot$sample_type),
col = list(Sample = c("Solid Tissue Normal" = "skyblue3","Primary Tumor" = "darkorange3")),
annotation_name_gp = element_blank())
DIFF_TCGA = Heatmap(as.matrix(TCGA_numeric_norm),
show_row_names = TRUE,
show_column_names = FALSE,
show_row_dend = FALSE,
show_column_dend = FALSE,
cluster_columns = TRUE,
cluster_rows = TRUE,
name = "Normalized RPKM",
na_col = "lightgrey",
top_annotation = HeatmapAnnotation(foo = anno_block(gp = gpar(fill = c("skyblue3","darkorange3")))),
column_split = TCGA_annot$sample_type,
row_names_gp = gpar(fontsize = 8),
column_title = "TCGA LUAD",
height = 8,
width = 10)
#CPTAC
ProtData = read.csv(paste(path,"alldata_publicproteomeLC.csv",sep = ""),row.names = 1)
SampData = read.csv(paste(path,"sampleData.csv",sep = ""))
select_col = str_detect(colnames(ProtData),"Unshared")
Prot_data_sub = ProtData[-3:-1,select_col]
select_col1 = (str_detect(colnames(Prot_data_sub),"CPT"))
Prot_data_sub1 = Prot_data_sub[,select_col1]
new_names = str_split(colnames(Prot_data_sub1),"\\.",simplify = TRUE)[,1]
colnames(Prot_data_sub1) = new_names
Prot_data_sub1_DIFF = Prot_data_sub1[as.character(diffProteins$First.Gene.Name),]
rownames(Prot_data_sub1_DIFF) = as.character(diffProteins$First.Gene.Name)
ColType_cptac = SampData %>% filter(Aliquot..Specimen.Label. %in% colnames(Prot_data_sub1_DIFF)) %>% select(Aliquot..Specimen.Label.,Type) %>% tibble::deframe()
CPTAC_annot = HeatmapAnnotation(df = data.frame(Sample = ColType_cptac),
col = list(Sample = c("Normal" = "skyblue3","Tumor" = "darkorange3")),
annotation_name_gp = element_blank())
DIFF_CPTAC = Heatmap(as.matrix(Prot_data_sub1_DIFF),
show_row_names = TRUE,
show_column_names = FALSE,
show_row_dend = FALSE,
show_column_dend = FALSE,
cluster_columns = TRUE,
cluster_rows = TRUE,
name = "CPTAC Protein Abundance",
na_col = "lightgrey",
top_annotation = HeatmapAnnotation(foo = anno_block(gp = gpar(fill = c("skyblue3","darkorange3")))),
column_split = ColType_cptac,
row_names_gp = gpar(fontsize = 8),
column_title = "CPTAC Proteomics",
height = 8,
width = 10)
draw(DIFF_CPTAC)
#MAPP
MAPPdata = read.csv(paste0(path,"060219_1_Analysis\\060219_1_FilteredData\\060219_1_Lung-cancer-midgam_proteins.txt"),sep = "\t", header = 1,stringsAsFactors = FALSE)
MAPPdata_sub = MAPPdata %>%select(First.Gene.Name,N.107.Log10.median:N.76.Log10.median,T.107.Log10.median:T.76.Log10.median)
rownames(MAPPdata_sub) = MAPPdata_sub$First.Gene.Name
MAPPdata_sub = MAPPdata_sub[,-1]
MAPPdata_sub_diff = MAPPdata_sub[as.character(diffProteins$First.Gene.Name),]
MAPPdata_sub_diff_norm = data.frame(t(scale(t(MAPPdata_sub_diff))))
colnames(MAPPdata_sub_diff_norm) = str_sub(colnames(MAPPdata_sub_diff_norm),end = -14)
sample_annot = HeatmapAnnotation(df = data.frame(Sample = c(rep("Adjacent",8),rep("Tumor",8))),
col = list(Sample = c("Adjacent" = "skyblue3","Tumor" = "darkorange3")))
MAPP_diff = Heatmap(MAPPdata_sub_diff_norm,
show_row_names = TRUE,
show_column_names = FALSE,
show_row_dend = FALSE,
show_column_dend = FALSE,
cluster_columns = TRUE,
cluster_rows = TRUE,
name = "MAPP intensity",
na_col = "lightgrey",
top_annotation = HeatmapAnnotation(foo = anno_block(gp = gpar(fill = c("skyblue3","darkorange3")))),
row_names_gp = gpar(fontsize = 8),
column_split = c(rep("Adjacent",8),rep("Tumor",8)),
column_title = "MAPP",
height = 16,
width = 8)
## 1D
Protdata = read.csv(paste0(path,"110319_1_Analysis-1D\\110319_1_FilteredData\\110319_1_Lung-Midgam-1D_proteins.txt"),sep = "\t", header = 1,stringsAsFactors = FALSE)
Protdata_sub = Protdata %>%select(First.Gene.Name,N.107.Log10.median:N.76.Log10.median,T.107.Log10.median:T.76.Log10.median) %>% filter(First.Gene.Name %in% as.character(diffProteins$First.Gene.Name))
rownames(Protdata_sub) = Protdata_sub$First.Gene.Name
Protdata_sub = Protdata_sub[,-1]
Protdata_sub_diff = Protdata_sub[as.character(diffProteins$First.Gene.Name),]
Protdata_sub_diff_norm = data.frame(t(scale(t(Protdata_sub_diff))))
colnames(Protdata_sub_diff_norm) = str_sub(colnames(Protdata_sub_diff_norm),end = -14)
Prot_annot = HeatmapAnnotation(df = data.frame(Sample = c(rep("Adjacent",8),rep("Tumor",8))),
col = list(Sample = c("Adjacent" = "skyblue3","Tumor" = "darkorange3")),annotation_name_gp = element_blank())
Prot_diff = Heatmap(Protdata_sub_diff_norm,
show_row_names = TRUE,
show_column_names = FALSE,
show_row_dend = FALSE,
show_column_dend = FALSE,
cluster_columns = TRUE,
cluster_rows = TRUE,
name = "WCE Proteomics Protein Abundance",
na_col = "lightgrey",
top_annotation = HeatmapAnnotation(foo = anno_block(gp = gpar(fill = c("skyblue3","darkorange3")))),
row_names_gp = gpar(fontsize = 8),
column_split = c(rep("Adjacent",8),rep("Tumor",8)),
column_title = "WCE Proteomics",
height = 10,
width = 8)
pdf(paste0(path,"heatmapDifferentials_combined_TCGA.pdf"),width = 12)
draw(MAPP_diff + Prot_diff + DIFF_CPTAC + DIFF_TCGA)
dev.off()
#ttest_RANK
Protdata_ALL = Protdata %>%select(First.Gene.Name,N.107.Log10.median:N.76.Log10.median,T.107.Log10.median:T.76.Log10.median)
Protdata_ALL[,2:17] = 10^Protdata_ALL[,2:17]
rownames(Protdata_ALL) = make.unique(Protdata_ALL$First.Gene.Name)
TTEST_RESULTS = as.data.frame(matrix(nrow = nrow(Protdata_ALL),ncol = 4))
colnames(TTEST_RESULTS) = c("Gene","Mean-Adj","Mean-Tum","pval")
rownames(TTEST_RESULTS) = rownames(Protdata_ALL)
for (gene in rownames(Protdata_ALL)){
TTEST_RESULTS[gene,"Gene"] = Protdata_ALL[gene,"First.Gene.Name"]
TTEST_RESULTS[gene,"Mean-Adj"] = rowMeans(Protdata_ALL[gene,c(2:9)])
TTEST_RESULTS[gene,"Mean-Tum"] = rowMeans(Protdata_ALL[gene,c(10:17)])
test = t.test(x = Protdata_ALL[gene,c(2:9)],y = Protdata_ALL[gene,c(10:17)])
TTEST_RESULTS[gene,"pval"] = test$p.value
}
TTEST_RESULTS$adjP = p.adjust(TTEST_RESULTS$pval,method = "BH")
TTEST_RESULTS$FC = log2(TTEST_RESULTS$`Mean-Tum`/TTEST_RESULTS$`Mean-Adj`)
TTEST_RESULTS$DIFF = "NO"
TTEST_RESULTS[TTEST_RESULTS$Gene %in% as.character(diffProteins$First.Gene.Name),"DIFF"] = "Differential MAPP"
TTEST_RESULTS$SIZE = ifelse(TTEST_RESULTS$DIFF == "Differential MAPP",.02,.01)
TTEST_RESULTS %>%
arrange(SIZE) %>%
ggplot() +
geom_point(aes(x = FC,y = -log10(adjP),color = DIFF))+
geom_hline(yintercept = 1.3)+
geom_vline(xintercept = -1) +
geom_vline(xintercept = 1)+
theme(axis.text.y = element_text(size=10),
axis.text.x = element_text(size=10),
axis.title.y = element_text(size=11,face = "bold"),
axis.title.x = element_text(size=11,face = "bold"),
legend.text = element_text(size=8),
legend.position = "right",
legend.background = element_rect(fill = rgb(1, 1, 1, 0)),
panel.background = element_blank(),
panel.grid.major = element_blank(),
panel.grid.minor = element_blank(),
axis.line = element_line(colour = "black"),
aspect.ratio = 1,
panel.border = element_rect(colour = "black", fill=NA, size=.5))+
scale_color_manual(values = c("darkred","lightgrey")) +
xlab(paste("Log2 Fold Change Tumor/Adjacent")) + ylab("-Log10(Pvalue)")
ggsave(filename = paste0(path,"diffVolcano_MAPP.pdf"),height = 3)
write.csv(diffProteins,file = paste0(path,"MAPP_differentials.csv"))