-
Notifications
You must be signed in to change notification settings - Fork 2
/
examine_batch_correction.R
365 lines (322 loc) · 18.4 KB
/
examine_batch_correction.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
library(dplyr)
library(lisi)
library(patchwork)
setwd("/data1/niehu/ibd_public_data_20210821/analysis_20220111/02.integrate/combine_clean_V3/integrate_without_batch_correction/")
# load color
color.minor_cluster = read.table("../color_set/major_cluster_color.txt", header = F, stringsAsFactors = F, sep = "\t", comment.char = "!")
colnames(color.minor_cluster) = c("minor_cluster","color")
color.minor_cluster %>% head
rownames(color.minor_cluster) = color.minor_cluster$minor_cluster
color.minor_cluster$minor_cluster = NULL
# load meta
meta = readRDS("../update_cell_type/all.clean.update_metadata.rds")
colnames(meta)
minor_clusters = levels(meta$minor_cluster)
studies = levels(meta$study)
# plot umap
source("/data1/niehu/soft/script/color_palette_89.R")
color.study = color_palette_89[ seq( from = 1,to = length(color_palette_89),
length.out = seu$study %>% droplevels %>% levels %>% length) %>% as.integer ]
names(color.study) = seu$study %>% levels()
# load seu
#seu = readRDS("../update_cell_type/all.clean.raw.rds")
# CD8T
if(T){
# set cluster
cluster = 'CD8T'
# load data
obs= read.table("./CD8T.no_batch_correction.obs.csv", header = T, sep = ",", stringsAsFactors = F, row.names = 1)
head(obs)
cd8t_minor_clusters = minor_clusters[minor_clusters %in% obs$minor_cluster]
obs$minor_cluster = factor(obs$minor_cluster, levels = cd8t_minor_clusters)
obs$study = factor(obs$study, levels = studies)
mycolor = color.minor_cluster[cd8t_minor_clusters,]
p1 = ggplot(obs, aes(x = UMAP_1, y = UMAP_2, color = study)) +
geom_point(size= 0.1, alpha = 0.7) +
scale_color_manual(values = color.study[obs$study %>% levels]) +
#labs(x="UMAP_1",y="UMAP_2", title =paste0(cluster," cells")) +
guides(colour = guide_legend(override.aes = list(size=3, alpha = 1)))+
theme_classic2() +
theme(
legend.position = "none",
axis.text.x = element_text(family = "ArialMT", size = 16, color = "black"),
axis.text.y = element_text(family = "", size = 16, color = 'black'),
axis.title.y = element_text(family = "ArialMT",size = 16, color = 'black'),
axis.title.x = element_text(family = "ArialMT",size = 16, color = 'black'),
legend.text = element_text(family = "ArialMT",size = 12, color ='black'),
legend.title = element_text(family = "ArialMT",size = 12, color ='black'),
strip.text = element_text(family = "ArialMT",face = "plain", size = 16, color ='black'),
strip.background = element_blank(),
plot.title = element_text(family = "ArialMT",size = 18, color ='black', hjust = 0.5)
) + coord_fixed()
p2 = ggplot(meta[obs %>% rownames, ], aes(x = UMAP_1, y = UMAP_2, color = study)) +
geom_point(size= 0.1, alpha = 0.7) +
scale_color_manual(values = color.study[obs$study %>% levels]) +
#labs(x="UMAP_1",y="UMAP_2", title =paste0(cluster," cells")) +
guides(colour = guide_legend(override.aes = list(size=1, alpha = 1)))+
theme_classic2() +
theme(
legend.position = "none",
axis.text.x = element_text(family = "ArialMT", size = 16, color = "black"),
axis.text.y = element_text(family = "", size = 16, color = 'black'),
axis.title.y = element_text(family = "ArialMT",size = 16, color = 'black'),
axis.title.x = element_text(family = "ArialMT",size = 16, color = 'black'),
legend.text = element_text(family = "ArialMT",size = 12, color ='black'),
legend.title = element_text(family = "ArialMT",size = 12, color ='black'),
strip.text = element_text(family = "ArialMT",face = "plain", size = 16, color ='black'),
strip.background = element_blank(),
plot.title = element_text(family = "ArialMT",size = 18, color ='black', hjust = 0.5)
) + coord_fixed()
p3 = ggplot(obs, aes(x = UMAP_1, y = UMAP_2, color = minor_cluster)) +
geom_point(size= 0.1, alpha = 0.7) +
scale_color_manual(values = mycolor) +
#labs(x="UMAP_1",y="UMAP_2", title =paste0(cluster," cells")) +
guides(colour = guide_legend(override.aes = list(size=3, alpha = 1)))+
theme_classic2() +
theme(
legend.position = "none",
axis.text.x = element_text(family = "ArialMT", size = 16, color = "black"),
axis.text.y = element_text(family = "", size = 16, color = 'black'),
axis.title.y = element_text(family = "ArialMT",size = 16, color = 'black'),
axis.title.x = element_text(family = "ArialMT",size = 16, color = 'black'),
legend.text = element_text(family = "ArialMT",size = 12, color ='black'),
legend.title = element_text(family = "ArialMT",size = 12, color ='black'),
strip.text = element_text(family = "ArialMT",face = "plain", size = 16, color ='black'),
strip.background = element_blank(),
plot.title = element_text(family = "ArialMT",size = 18, color ='black', hjust = 0.5)
) + coord_fixed()
p4 = ggplot(meta[obs %>% rownames, ], aes(x = UMAP_1, y = UMAP_2, color = minor_cluster)) +
geom_point(size= 0.1, alpha = 0.7) +
scale_color_manual(values = mycolor) +
#labs(x="UMAP_1",y="UMAP_2", title =paste0(cluster," cells")) +
guides(colour = guide_legend(override.aes = list(size=1, alpha = 1)))+
theme_classic2() +
theme(
legend.position = "none",
axis.text.x = element_text(family = "ArialMT", size = 16, color = "black"),
axis.text.y = element_text(family = "", size = 16, color = 'black'),
axis.title.y = element_text(family = "ArialMT",size = 16, color = 'black'),
axis.title.x = element_text(family = "ArialMT",size = 16, color = 'black'),
legend.text = element_text(family = "ArialMT",size = 12, color ='black'),
legend.title = element_text(family = "ArialMT",size = 12, color ='black'),
strip.text = element_text(family = "ArialMT",face = "plain", size = 16, color ='black'),
strip.background = element_blank(),
plot.title = element_text(family = "ArialMT",size = 18, color ='black', hjust = 0.5)
) + coord_fixed()
#ggsave(paste0(cluster,"_umap.png"), width = 5, height = 5, device = "png",)
p = (p1 | p2) / (p3 | p4)
ggsave(filename = "cd8t.png", width = 8.5, height = 4.5, dpi = 600, plot = p, device = "png")
p5 = ggplot(obs, aes(x = UMAP_1, y = UMAP_2, color = study)) +
geom_point(size= 0.1, alpha = 0) +
scale_color_manual(values = color.study[obs$study %>% levels]) +
#labs(x="UMAP_1",y="UMAP_2", title =paste0(cluster," cells")) +
guides(colour = guide_legend(override.aes = list(size=2, alpha = 1)))+
theme_classic2() +
theme(
legend.position = "right",
axis.text.x = element_text(family = "ArialMT", size = 16, color = "black"),
axis.text.y = element_text(family = "", size = 16, color = 'black'),
axis.title.y = element_text(family = "ArialMT",size = 16, color = 'black'),
axis.title.x = element_text(family = "ArialMT",size = 16, color = 'black'),
legend.text = element_text(family = "ArialMT",size = 12, color ='black'),
legend.title = element_text(family = "ArialMT",size = 12, color ='black'),
strip.text = element_text(family = "ArialMT",face = "plain", size = 16, color ='black'),
strip.background = element_blank(),
plot.title = element_text(family = "ArialMT",size = 18, color ='black', hjust = 0.5)
) #+ coord_fixed()
p6 = ggplot(meta[obs %>% rownames, ], aes(x = UMAP_1, y = UMAP_2, color = minor_cluster)) +
geom_point(size= 0.1, alpha = 0) +
scale_color_manual(values = mycolor) +
#labs(x="UMAP_1",y="UMAP_2", title =paste0(cluster," cells")) +
guides(colour = guide_legend(override.aes = list(size=2, alpha = 1)))+
theme_classic2() +
theme(
legend.position = "right",
axis.text.x = element_text(family = "ArialMT", size = 16, color = "black"),
axis.text.y = element_text(family = "", size = 16, color = 'black'),
axis.title.y = element_text(family = "ArialMT",size = 16, color = 'black'),
axis.title.x = element_text(family = "ArialMT",size = 16, color = 'black'),
legend.text = element_text(family = "ArialMT",size = 12, color ='black'),
legend.title = element_text(family = "ArialMT",size = 12, color ='black'),
strip.text = element_text(family = "ArialMT",face = "plain", size = 16, color ='black'),
strip.background = element_blank(),
plot.title = element_text(family = "ArialMT",size = 18, color ='black', hjust = 0.5)
) #+ coord_fixed()
p = p5 | p6
ggsave(filename = "cd8t_blank.pdf", width = 8, height = 3.5, dpi = 600, plot = p, device = "pdf")
}
# Epi
if(T){
# set cluster
cluster = 'Epithelial'
# load data
obs= read.table("./Epi.no_batch_correction.obs.csv", header = T, sep = ",", stringsAsFactors = F, row.names = 1)
head(obs)
epi_minor_clusters = minor_clusters[minor_clusters %in% obs$minor_cluster]
obs$minor_cluster = factor(obs$minor_cluster, levels = epi_minor_clusters)
obs$study = factor(obs$study, levels = studies)
mycolor = color.minor_cluster[epi_minor_clusters,]
p1 = ggplot(obs, aes(x = UMAP_1, y = UMAP_2, color = study)) +
geom_point(size= 0.1, alpha = 0.7) +
scale_color_manual(values = color.study[obs$study %>% levels]) +
#labs(x="UMAP_1",y="UMAP_2", title =paste0(cluster," cells")) +
guides(colour = guide_legend(override.aes = list(size=3, alpha = 1)))+
theme_classic2() +
theme(
legend.position = "none",
axis.text.x = element_text(family = "ArialMT", size = 16, color = "black"),
axis.text.y = element_text(family = "", size = 16, color = 'black'),
axis.title.y = element_text(family = "ArialMT",size = 16, color = 'black'),
axis.title.x = element_text(family = "ArialMT",size = 16, color = 'black'),
legend.text = element_text(family = "ArialMT",size = 12, color ='black'),
legend.title = element_text(family = "ArialMT",size = 12, color ='black'),
strip.text = element_text(family = "ArialMT",face = "plain", size = 16, color ='black'),
strip.background = element_blank(),
plot.title = element_text(family = "ArialMT",size = 18, color ='black', hjust = 0.5)
) + coord_fixed()
p2 = ggplot(meta[obs %>% rownames, ], aes(x = UMAP_1, y = UMAP_2, color = study)) +
geom_point(size= 0.1, alpha = 0.7) +
scale_color_manual(values = color.study[obs$study %>% levels]) +
#labs(x="UMAP_1",y="UMAP_2", title =paste0(cluster," cells")) +
guides(colour = guide_legend(override.aes = list(size=1, alpha = 1)))+
theme_classic2() +
theme(
legend.position = "none",
axis.text.x = element_text(family = "ArialMT", size = 16, color = "black"),
axis.text.y = element_text(family = "", size = 16, color = 'black'),
axis.title.y = element_text(family = "ArialMT",size = 16, color = 'black'),
axis.title.x = element_text(family = "ArialMT",size = 16, color = 'black'),
legend.text = element_text(family = "ArialMT",size = 12, color ='black'),
legend.title = element_text(family = "ArialMT",size = 12, color ='black'),
strip.text = element_text(family = "ArialMT",face = "plain", size = 16, color ='black'),
strip.background = element_blank(),
plot.title = element_text(family = "ArialMT",size = 18, color ='black', hjust = 0.5)
) + coord_fixed()
p3 = ggplot(obs, aes(x = UMAP_1, y = UMAP_2, color = minor_cluster)) +
geom_point(size= 0.1, alpha = 0.7) +
scale_color_manual(values = mycolor) +
#labs(x="UMAP_1",y="UMAP_2", title =paste0(cluster," cells")) +
guides(colour = guide_legend(override.aes = list(size=3, alpha = 1)))+
theme_classic2() +
theme(
legend.position = "none",
axis.text.x = element_text(family = "ArialMT", size = 16, color = "black"),
axis.text.y = element_text(family = "", size = 16, color = 'black'),
axis.title.y = element_text(family = "ArialMT",size = 16, color = 'black'),
axis.title.x = element_text(family = "ArialMT",size = 16, color = 'black'),
legend.text = element_text(family = "ArialMT",size = 12, color ='black'),
legend.title = element_text(family = "ArialMT",size = 12, color ='black'),
strip.text = element_text(family = "ArialMT",face = "plain", size = 16, color ='black'),
strip.background = element_blank(),
plot.title = element_text(family = "ArialMT",size = 18, color ='black', hjust = 0.5)
) + coord_fixed()
p4 = ggplot(meta[obs %>% rownames, ], aes(x = UMAP_1, y = UMAP_2, color = minor_cluster)) +
geom_point(size= 0.1, alpha = 0.7) +
scale_color_manual(values = mycolor) +
#labs(x="UMAP_1",y="UMAP_2", title =paste0(cluster," cells")) +
guides(colour = guide_legend(override.aes = list(size=1, alpha = 1)))+
theme_classic2() +
theme(
legend.position = "none",
axis.text.x = element_text(family = "ArialMT", size = 16, color = "black"),
axis.text.y = element_text(family = "", size = 16, color = 'black'),
axis.title.y = element_text(family = "ArialMT",size = 16, color = 'black'),
axis.title.x = element_text(family = "ArialMT",size = 16, color = 'black'),
legend.text = element_text(family = "ArialMT",size = 12, color ='black'),
legend.title = element_text(family = "ArialMT",size = 12, color ='black'),
strip.text = element_text(family = "ArialMT",face = "plain", size = 16, color ='black'),
strip.background = element_blank(),
plot.title = element_text(family = "ArialMT",size = 18, color ='black', hjust = 0.5)
) + coord_fixed()
#ggsave(paste0(cluster,"_umap.png"), width = 5, height = 5, device = "png",)
p = (p1 | p2) / (p3 | p4)
ggsave(filename = "epi.png", width = 8.5, height = 4.5, dpi = 600, plot = p, device = "png")
p5 = ggplot(obs, aes(x = UMAP_1, y = UMAP_2, color = study)) +
geom_point(size= 0.1, alpha = 0) +
scale_color_manual(values = color.study[obs$study %>% levels]) +
#labs(x="UMAP_1",y="UMAP_2", title =paste0(cluster," cells")) +
guides(colour = guide_legend(override.aes = list(size=2, alpha = 1)))+
theme_classic2() +
theme(
legend.position = "right",
axis.text.x = element_text(family = "ArialMT", size = 16, color = "black"),
axis.text.y = element_text(family = "", size = 16, color = 'black'),
axis.title.y = element_text(family = "ArialMT",size = 16, color = 'black'),
axis.title.x = element_text(family = "ArialMT",size = 16, color = 'black'),
legend.text = element_text(family = "ArialMT",size = 12, color ='black'),
legend.title = element_text(family = "ArialMT",size = 12, color ='black'),
strip.text = element_text(family = "ArialMT",face = "plain", size = 16, color ='black'),
strip.background = element_blank(),
plot.title = element_text(family = "ArialMT",size = 18, color ='black', hjust = 0.5)
) #+ coord_fixed()
p6 = ggplot(meta[obs %>% rownames, ], aes(x = UMAP_1, y = UMAP_2, color = minor_cluster)) +
geom_point(size= 0.1, alpha = 0) +
scale_color_manual(values = mycolor) +
#labs(x="UMAP_1",y="UMAP_2", title =paste0(cluster," cells")) +
guides(colour = guide_legend(override.aes = list(size=2, alpha = 1)))+
theme_classic2() +
theme(
legend.position = "right",
axis.text.x = element_text(family = "ArialMT", size = 16, color = "black"),
axis.text.y = element_text(family = "", size = 16, color = 'black'),
axis.title.y = element_text(family = "ArialMT",size = 16, color = 'black'),
axis.title.x = element_text(family = "ArialMT",size = 16, color = 'black'),
legend.text = element_text(family = "ArialMT",size = 12, color ='black'),
legend.title = element_text(family = "ArialMT",size = 12, color ='black'),
strip.text = element_text(family = "ArialMT",face = "plain", size = 16, color ='black'),
strip.background = element_blank(),
plot.title = element_text(family = "ArialMT",size = 18, color ='black', hjust = 0.5)
) #+ coord_fixed()
p = p5 | p6
ggsave(filename = "epi_blank.pdf", width = 8, height = 3.5, dpi = 600, plot = p, device = "pdf")
}
# LISI
if(T){
obs= read.table("./CD8T.no_batch_correction.obs.csv", header = T, sep = ",", stringsAsFactors = F, row.names = 1)
res1 <- compute_lisi(obs[,c("UMAP_1","UMAP_2")], obs, c('study'))
after = meta[obs %>% rownames(),c("UMAP_1","UMAP_2","study")]
res2 <- compute_lisi(after[,c("UMAP_1","UMAP_2")], after, c('study'))
res3 = cbind(res1, res2)
colnames(res3) = c("before","after")
res3$group = "CD8T"
obs= read.table("./Epi.no_batch_correction.obs.csv", header = T, sep = ",", stringsAsFactors = F, row.names = 1)
res4 <- compute_lisi(obs[,c("UMAP_1","UMAP_2")], obs, c('study'))
after = meta[obs %>% rownames(),c("UMAP_1","UMAP_2","study")]
res5 <- compute_lisi(after[,c("UMAP_1","UMAP_2")], after, c('study'))
res6 = cbind(res4, res5)
colnames(res6) = c("before","after")
res6$group = "Epithelial"
res = rbind(res3, res6)
res = reshape2::melt(res)
head(res)
# summary number of cells per group
res %>% group_by(group, variable) %>% count() # 184220, 195735
# plot
my_comparisons = list( c("before","after"))
ggplot(res, aes(x = variable, y = value, fill = variable)) +
facet_grid(~group) +
geom_boxplot(outlier.size = 0.01) +
labs(x="", y= "LISI", title = "") +
scale_fill_d3(alpha = 0.6, palette = "category20") +
theme_classic2() +
theme(
legend.position = "none",
axis.text.x = element_text(family = "ArialMT", size = 16, color = "black", angle = 45, hjust = 1),
#axis.text.x = element_text(family = "ArialMT", size = 16, color = "black"),
axis.text.y = element_text(family = "", size = 16, color = 'black'),
axis.title.y = element_text(family = "ArialMT",size = 16, color = 'black'),
axis.title.x = element_text(family = "ArialMT",size = 16, color = 'black'),
legend.text = element_text(family = "ArialMT",size = 12, color ='black'),
legend.title = element_text(family = "ArialMT",size = 12, color ='black'),
strip.text = element_text(family = "ArialMT",face = "plain", size = 16, color ='black'),
strip.background = element_blank(),
plot.title = element_text(family = "ArialMT",size = 18, color ='black', hjust = 0.5)
) +
stat_compare_means(comparisons = my_comparisons,
method = "wilcox.test") #label = "p.signif")
ggsave("batch.pdf", width = 3, height = 4)
wilcox.test(res[res$group == "Epithelial" & res$variable == "before",]$value,
res[res$group == "Epithelial" & res$variable == "after",]$value)
wilcox.test(res[res$group == "CD8T" & res$variable == "before",]$value,
res[res$group == "CD8T" & res$variable == "after",]$value)
}