-
Notifications
You must be signed in to change notification settings - Fork 0
/
06_ExperimentHub.R
102 lines (60 loc) · 2.39 KB
/
06_ExperimentHub.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
#####################################################
# LC snRNA-seq analyses: SCE object for ExperimentHub
# Lukas Weber, Oct 2022
#####################################################
library(here)
library(SingleCellExperiment)
# ----------------
# Load SCE objects
# ----------------
# load SCE objects from previous script
# SCE object without gene filtering
fn <- here("processed_data", "SCE", "sce_logcounts")
sce_logcounts <- readRDS(paste0(fn, ".rds"))
dim(sce_logcounts)
# SCE object containing cluster labels
fn <- here("processed_data", "SCE", "sce_clustering_secondary")
sce_clustering_secondary <- readRDS(paste0(fn, ".rds"))
dim(sce_clustering_secondary)
# ------------
# Remove zeros
# ------------
# remove genes with zero expression
ix_zeros <- rowSums(counts(sce_logcounts)) == 0
table(ix_zeros)
dim(sce_logcounts)
sce_logcounts <- sce_logcounts[!ix_zeros, ]
dim(sce_logcounts)
# check no new zeros introduced
table(rowSums(counts(sce_logcounts)) == 0, useNA = "always")
table(colSums(counts(sce_logcounts)) == 0, useNA = "always")
# -----------------------------------
# Create SCE object for ExperimentHub
# -----------------------------------
# select columns of colData and store cluster labels in SCE object without gene filtering
stopifnot(ncol(sce_logcounts) == ncol(sce_clustering_secondary))
stopifnot(all(colnames(sce_logcounts) == colnames(sce_clustering_secondary)))
stopifnot(all(colData(sce_logcounts)$Sample == colData(sce_clustering_secondary)$Sample))
stopifnot(all(colData(sce_logcounts)$Barcode == colData(sce_clustering_secondary)$Barcode))
# select and re-order columns in colData to keep
cols_keep_logcounts <- c(1:2, 8:12, 14:15)
colnames(colData(sce_logcounts))[cols_keep_logcounts]
cols_keep_clustering_secondary <- c(18, 16, 21, 22, 19, 20, 17)
colnames(colData(sce_clustering_secondary))[cols_keep_clustering_secondary]
# store combined colData
colData(sce_logcounts) <- cbind(
colData(sce_logcounts)[, cols_keep_logcounts],
colData(sce_clustering_secondary)[, cols_keep_clustering_secondary])
# check
head(colData(sce_logcounts), 2)
# ------------
# Column names
# ------------
# use key_id as column names
colnames(sce_logcounts) <- colData(sce_logcounts)$Key
# -----------
# Save object
# -----------
# save SCE object for ExperimentHub
fn_out <- here("processed_data", "SCE", "LC_singleNucleus_SCE_EHub")
saveRDS(sce_logcounts, paste0(fn_out, ".rds"))