In [1]:
Sys.setenv(RETICULATE_PYTHON = "/data1/lesliec/tyler/utils/miniforge3/envs/multiome/bin/python")
setwd("~/0-workspace/CCR7_DC/oral-tolerance-Gardner/")

suppressPackageStartupMessages({
  library(Seurat)
  library(ArchR)
  library(Rmagic)
  library(anndata)
  library(Matrix)
  library(ggplot2)
  library(ggrepel)
  library(cowplot)
  library(ComplexHeatmap)
  library(circlize)
  library(dplyr)
  library(rasterpdf)
  library(viridis)
  library(future)
})
set.seed(1)
plan("multicore", workers = 16)
options(future.globals.maxSize = Inf)

source("utils.R")

pal <- list(
  Clusters = c(
    "#5fed0e", "#489de8", "#d40663", "#d1c50f", "#077315",
    "#e67109", "#785cd4", "#260691", "#9e7d3f", "#bd537a",
    "#49709c", "#aebeff", "#9c2903", "#9c6fa8", "#827c68",
    "#062e0b", "#1ee3c5"
  ),
  Clusters_long = c(
    "#000000", "#FAD09F", "#1CE6FF", "#FF34FF", "#FF4A46", "#008941", "#006FA6", "#A30059",
    "#FFDBE5", "#7A4900", "#0000A6", "#63FFAC", "#B79762", "#004D43", "#8FB0FF", "#997D87",
    "#5A0007", "#809693", "#1B4400", "#4FC601", "#3B5DFF", "#4A3B53", "#FF2F80",
    "#61615A", "#BA0900", "#6B7900", "#00C2A0", "#FFAA92", "#FF90C9", "#B903AA", "#D16100",
    "#DDEFFF", "#000035", "#7B4F4B", "#A1C299", "#300018", "#0AA6D8", "#013349", "#00846F",
    "#372101", "#FFFF00", "#C2FFED", "#A079BF", "#CC0744", "#C0B9B2", "#C2FF99", "#001E09",
    "#00489C", "#6F0062", "#0CBD66", "#EEC3FF", "#456D75", "#B77B68", "#7A87A1", "#788D66",
    "#885578", "#FAD09F", "#FF8A9A", "#D157A0", "#BEC459", "#456648", "#0086ED", "#886F4C",
    "#34362D", "#B4A8BD", "#00A6AA", "#452C2C", "#636375", "#A3C8C9", "#FF913F", "#938A81",
    "#575329", "#00FECF", "#B05B6F", "#8CD0FF", "#3B9700", "#04F757", "#C8A1A1", "#1E6E00",
    "#7900D7", "#A77500", "#6367A9", "#A05837", "#6B002C", "#772600", "#D790FF", "#9B9700",
    "#549E79", "#FFF69F", "#201625", "#72418F", "#BC23FF", "#99ADC0", "#3A2465", "#922329",
    "#5B4534", "#FDE8DC", "#404E55", "#0089A3", "#CB7E98", "#A4E804", "#324E72", "#6A3A4C")
)
dir.create('plots')
saveRDS(pal, file = "plots/palette.rds")
pal <- readRDS(file = "plots/palette.rds")


                                                   / |
                                                 /    \
            .                                  /      |.
            \\\                              /        |.
              \\\                          /           `|.
                \\\                      /              |.
                  \                    /                |\
                  \\#####\           /                  ||
                ==###########>      /                   ||
                 \\##==......\    /                     ||
            ______ =       =|__ /__                     ||      \\\
       \               '        ##_______ _____ ,--,__,=##,__   ///
        ,    __==    ___,-,__,--'#'  ==='      `-'    | ##,-/
        -,____,---'       \\####\\________________,--\\_##,/
           ___      .______        ______  __    __  .______      
          /   \     |   _  \      /      ||  |  |  | |   _  \     
         /  ^  \    |  |_) 

# Load data

In [2]:
adata.e <- read_h5ad("data/GSE285182_early_life_int.h5ad")
adata.a <- read_h5ad("data/GSE273746_named_int_td_adata_OG_FINAL.h5ad")

In [7]:
write.csv(adata.e$obs, "data/GSE285182_adata_obs.csv", quote = F)
write.csv(adata.a$obs, "data/GSE273746_adata_obs.csv", quote = F)

In [9]:
# 1. Load data ####

pref.sro <- 'Seurat/'; pref.p.sro <- 'plots/Seurat/'
dir.create(pref.sro); dir.create(pref.p.sro)

In [14]:
data.folder <- "data/"
samples <- c(adult_LN = "GSE273746_RAW-Adult-RLT/GSM8436371_tomato_ln", 
             adult_mLN = "GSE273746_RAW-Adult-RLT/GSM8436372_tomato_mln", 
             adult_spleen = "GSE273746_RAW-Adult-RLT/GSM8436373_tomato_sp", 
             early_LN = "GSE285182_RAW-Early-Life-RLT/GSM8697645_early_life_ln",
             early_mLN = "GSE285182_RAW-Early-Life-RLT/GSM8697646_early_life_mln",
             early_spleen = "GSE285182_RAW-Early-Life-RLT/GSM8697647_early_life_sp"
            )

In [15]:
sro.list <- lapply(1:length(samples), function(i){
  counts <- Read10X_h5(paste0(data.folder, samples[i], "_filtered_feature_bc_matrix.h5"))
  sro <- CreateSeuratObject(
    project = names(samples)[i],
    counts = counts
  )
  return(list(sro = sro))
})

In [17]:
sro.a <- merge(sro.list[[1]]$sro, y = sapply(sro.list,"[[",1)[2:3], 
             add.cell.ids = names(samples)[1:3], project = "Gardner.adult")

sro.e <- merge(sro.list[[4]]$sro, y = sapply(sro.list,"[[",1)[5:6], 
             add.cell.ids = names(samples)[4:6], project = "Gardner.early")

In [20]:
sro.a$sample <- sro.a$orig.ident
sro.a$tissue <- stringr::str_split_i(sro.a$sample, "_", i=2)
sro.a$age <- stringr::str_split_i(sro.a$sample, "_", i=1)
sro.a <- PercentageFeatureSet(sro.a, pattern = "^mt-", col.name = "MtFrac_RNA")

sro.e$sample <- sro.e$orig.ident
sro.e$tissue <- stringr::str_split_i(sro.e$sample, "_", i=2)
sro.e$age <- stringr::str_split_i(sro.e$sample, "_", i=1)
sro.e <- PercentageFeatureSet(sro.e, pattern = "^mt-", col.name = "MtFrac_RNA")

Adult RLT preprocessing
Cells were filtered out if they met any of the following criteria: 
- \> 5% mitochondrial reads,
- \> 8,000 unique genes, and \> 60,000 total reads.
- The total cell counts sequenced are as follows: 20,629 (LN), 4,609 (mLN), 14,847 (spleen). The total cells postquality-control (QC) are as follows: 19,642 (LN), 4,135 (mLN), 13,222 (spleen).

Early-life RLT preprocessing
Cells were filtered out if they met any of the following criteria: 
- \>5% mitochondrial reads,
- \>5,500 unique genes, and \>35,000 total reads.
- The total cell counts sequenced are as follows: 17,975 (LN), 13,482 (mLN), 27,642 (spleen). The total cells post-QC are as follows: 14,682 (LN), 11,428 (mLN), 22,408 (spleen).

In [28]:
thr.a <- data.frame(nc.max = 60000, nf.max = 8000, 
                  mp.max = 5)

thr.e <- data.frame(nc.max = 35000, nf.max = 5500, 
                  mp.max = 5)

In [25]:
17975+13482+27642

In [26]:
14682+11428+22408

In [29]:
cell.discard.a <- sro.a$nCount_RNA > thr.a$nc.max |
  sro.a$nFeature_RNA > thr.a$nf.max |
  sro.a$MtFrac_RNA > thr.a$mp.max
table(cell.discard.a)

cell.discard.e <- sro.e$nCount_RNA > thr.e$nc.max |
  sro.e$nFeature_RNA > thr.e$nf.max |
  sro.e$MtFrac_RNA > thr.e$mp.max
table(cell.discard.e)

# cell.discard.a
# FALSE  TRUE 
# 38409  1676 
# cell.discard.e
# FALSE  TRUE 
# 52351  6748 

cell.discard.a
FALSE  TRUE 
38409  1676 

cell.discard.e
FALSE  TRUE 
52351  6748 

In [60]:
Idents(sro.a) <- sro.a$sample
dir.create("plots/Seurat/QC", recursive = T)
pdf("plots/Seurat/QC/violin-adult-sample-RNA-QC.pdf", width = 8, height = 6)
plot.all.QC(sro.a, ident = "sample", thr = thr.a)
dev.off()

“'plots/Seurat/QC' already exists”


[[1]]

[[2]]

[[3]]

[[4]]

[[5]]


In [61]:
Idents(sro.e) <- sro.e$sample
dir.create("plots/Seurat/QC", recursive = T)
pdf("plots/Seurat/QC/violin-early-sample-RNA-QC.pdf", width = 8, height = 6)
plot.all.QC(sro.e, ident = "sample", thr = thr.e)
dev.off()

“'plots/Seurat/QC' already exists”


[[1]]

[[2]]

[[3]]

[[4]]

[[5]]


In [63]:
# sro <- sro[, !cell.discard] # not run here
write.csv(sro.a@meta.data, file = "Seurat/adult-initial-meta-data.csv")
saveRDS(sro.a, file = "Seurat/adult-initial-SRO.rds")

write.csv(sro.e@meta.data, file = "Seurat/early-initial-meta-data.csv")
saveRDS(sro.e, file = "Seurat/early-initial-SRO.rds")

# RNA analysis

In [4]:
pref.sro.a <- 'Seurat/adult/'; pref.p.sro.a <- 'plots/Seurat/adult/'
dir.create(pref.sro.a); dir.create(pref.p.sro.a)
pref.sro.e <- 'Seurat/early/'; pref.p.sro.e <- 'plots/Seurat/early/'
dir.create(pref.sro.e); dir.create(pref.p.sro.e)

“'Seurat/adult' already exists”
“'plots/Seurat/adult' already exists”
“'Seurat/early' already exists”
“'plots/Seurat/early' already exists”


In [None]:
# # 2. RNA analysis ####
# sro.a <- readRDS(paste0('Seurat/adult-initial-SRO.rds'))
# sro.a <- sro.a[, !cell.discard.a]

# sro.e <- readRDS(paste0('Seurat/early-initial-SRO.rds'))
# sro.e <- sro.e[, !cell.discard.e]

In [98]:
obs.a <- read.csv("data/GSE273746_adata_obs.csv", row.names = 1)
obs.e <- read.csv("data/GSE285182_adata_obs.csv", row.names = 1)

In [94]:
sro.a$barcode <- stringr::str_split_i(rownames(sro.a@meta.data), "_", 3)
sro.a$paper.annotations <- obs.a[sro.a$barcode, ]$final_cell_type

In [122]:
table(sro.a$barcode %in% rownames(obs.a))


FALSE  TRUE 
 2768 35641 

In [119]:
sro.e$barcode <- stringr::str_split_i(rownames(sro.e@meta.data), "_", 3)
sro.e$paper.clusters <- obs.e[sro.e$barcode, ]$leiden

In [121]:
table(sro.e$barcode %in% rownames(obs.e))


FALSE  TRUE 
38350 14001 

In [120]:
table(sro.e$paper.clusters, useNA='ifany')


    0     1     2     3     4     5     6     7     9    10    11    12    14 
 2499  2107  1970  1101  1071   915   882   602   523   498   441   374   324 
   15    16    17    18  <NA> 
  312   283   121    57 38271 

## adult and early life

In [78]:
# sro.input <- sro.a
# pref.sro.input <- pref.sro.a
# pref.p.sro.input <- pref.p.sro.a

In [132]:
sro.input <- sro.e
pref.sro.input <- pref.sro.e
pref.p.sro.input <- pref.p.sro.e

In [133]:
s.genes <- rownames(sro.input)[toupper(rownames(sro.input)) %in% toupper(cc.genes.updated.2019$s.genes)]
g2m.genes <- rownames(sro.input)[toupper(rownames(sro.input)) %in% toupper(cc.genes.updated.2019$g2m.genes)]
sro.input <- CellCycleScoring(sro.input, s.features = s.genes, g2m.features = g2m.genes, set.ident = F)
sro.input$Phase <- gsub("G2M", "G2/M", sro.input$Phase)

res.list <- seq(0.2, 2, 0.2)
cell.count <- rowSums(sro.input@assays$RNA@counts > 0)
sro.input <- NormalizeData(sro.input[cell.count > 1, ]) %>%
  FindVariableFeatures(selection.method = "vst", nfeatures = 5000)
sro.input <- ScaleData(sro.input, features = rownames(sro.input)) %>%
  RunPCA(features = rownames(sro.input), npcs = 50) %>%
  FindNeighbors(dims = 1:30, k.param = 30) %>%
  FindClusters(resolution = res.list) %>%
  RunUMAP(dims = 1:30, n.neighbors = 30, metric = "cosine", min.dist = 0.4, spread = 1)

Centering and scaling data matrix

PC_ 1 
Positive:  Tmem176b, Tmem176a, Bhlhe40, S100a4, Fcer1g, Dscam, Kit, Hmgn3, Rora, Arhgef28 
	   Plcb4, Pxdc1, Ern1, Dgat1, Cntn1, Traf1, Cdkn1a, Klrb1b, Mpp7, Ell2 
	   Odc1, Ptms, Cd82, Gem, Maff, Zbtb46, Gadd45b, Fam110a, Ckb, Rasl11a 
Negative:  Rpl12, Cd3d, Inpp4b, Cd2, Gramd3, Satb1, Ablim1, Cd3g, Ms4a4b, Gm2682 
	   Cd3e, Cmah, Gimap6, Arhgap15, Lef1, Trac, Scml4, Themis, Bach2, Pde3b 
	   Rps3, Ptprc, Foxp1, Skap1, S1pr1, Rps20, Rpl13a, Aff3, Cd28, Coro1a 
PC_ 2 
Positive:  Cd79a, Ebf1, Igkc, Ly6d, Ms4a1, Cd79b, Syk, Bank1, Mef2c, Cd74 
	   Napsa, Pax5, Ighm, Fcrla, Blnk, Ifi30, Wdfy4, Iglc3, H2-Ab1, Ctsh 
	   Unc93b1, Iglc2, Pkig, Fcmr, H2-Aa, Mzb1, H2-Eb1, Btk, Ighd, Siglecg 
Negative:  Itk, Emb, Trbc2, Fyb, Camk4, Skap1, Fxyd5, Tnik, Tcf7, Ms4a4b 
	   Ptpn22, Txk, Utrn, Cd3g, Cd3d, Prkch, Il7r, Cd3e, Hcst, Gm2682 
	   Zc3hav1, Ppm1h, Tmsb10, Gm56906, Tox, Vps37b, Lef1, Trac, Ms4a6b, Saraf 
PC_ 3 
Positive:  Rpl41, Ppia, Rpsa, Rps27a, R

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 52351
Number of edges: 2804511

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9611
Number of communities: 18
Elapsed time: 28 seconds


“UNRELIABLE VALUE: One of the ‘future.apply’ iterations (‘future_lapply-1’) unexpectedly generated random numbers without declaring so. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'future.seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this check, use 'future.seed = NULL', or set option 'future.rng.onMisuse' to "ignore".”


Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 52351
Number of edges: 2804511

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9395
Number of communities: 24
Elapsed time: 25 seconds


“UNRELIABLE VALUE: One of the ‘future.apply’ iterations (‘future_lapply-2’) unexpectedly generated random numbers without declaring so. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'future.seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this check, use 'future.seed = NULL', or set option 'future.rng.onMisuse' to "ignore".”


Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 52351
Number of edges: 2804511

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9192
Number of communities: 26
Elapsed time: 25 seconds


“UNRELIABLE VALUE: One of the ‘future.apply’ iterations (‘future_lapply-3’) unexpectedly generated random numbers without declaring so. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'future.seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this check, use 'future.seed = NULL', or set option 'future.rng.onMisuse' to "ignore".”


Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 52351
Number of edges: 2804511

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9036
Number of communities: 33
Elapsed time: 23 seconds


“UNRELIABLE VALUE: One of the ‘future.apply’ iterations (‘future_lapply-4’) unexpectedly generated random numbers without declaring so. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'future.seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this check, use 'future.seed = NULL', or set option 'future.rng.onMisuse' to "ignore".”


Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 52351
Number of edges: 2804511

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8927
Number of communities: 37
Elapsed time: 24 seconds


“UNRELIABLE VALUE: One of the ‘future.apply’ iterations (‘future_lapply-5’) unexpectedly generated random numbers without declaring so. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'future.seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this check, use 'future.seed = NULL', or set option 'future.rng.onMisuse' to "ignore".”


Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 52351
Number of edges: 2804511

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8824
Number of communities: 40
Elapsed time: 22 seconds


“UNRELIABLE VALUE: One of the ‘future.apply’ iterations (‘future_lapply-6’) unexpectedly generated random numbers without declaring so. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'future.seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this check, use 'future.seed = NULL', or set option 'future.rng.onMisuse' to "ignore".”


Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 52351
Number of edges: 2804511

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8738
Number of communities: 41
Elapsed time: 23 seconds


“UNRELIABLE VALUE: One of the ‘future.apply’ iterations (‘future_lapply-7’) unexpectedly generated random numbers without declaring so. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'future.seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this check, use 'future.seed = NULL', or set option 'future.rng.onMisuse' to "ignore".”


Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 52351
Number of edges: 2804511

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8647
Number of communities: 43
Elapsed time: 23 seconds


“UNRELIABLE VALUE: One of the ‘future.apply’ iterations (‘future_lapply-8’) unexpectedly generated random numbers without declaring so. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'future.seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this check, use 'future.seed = NULL', or set option 'future.rng.onMisuse' to "ignore".”


Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 52351
Number of edges: 2804511

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8574
Number of communities: 46
Elapsed time: 22 seconds


“UNRELIABLE VALUE: One of the ‘future.apply’ iterations (‘future_lapply-9’) unexpectedly generated random numbers without declaring so. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'future.seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this check, use 'future.seed = NULL', or set option 'future.rng.onMisuse' to "ignore".”


Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 52351
Number of edges: 2804511

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8500
Number of communities: 46
Elapsed time: 22 seconds


“UNRELIABLE VALUE: One of the ‘future.apply’ iterations (‘future_lapply-10’) unexpectedly generated random numbers without declaring so. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'future.seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this check, use 'future.seed = NULL', or set option 'future.rng.onMisuse' to "ignore".”
14:32:01 UMAP embedding parameters a = 0.7669 b = 1.223

14:32:02 Read 52351 rows and found 30 numeric columns

14:32:02 Using Annoy for neighbor search, n_neighbors = 30

14:32:02 Building Annoy index with metric = cosine, n_trees = 50

0%   10   20   30   40   50   60   70   80   90   100%

[----|----|----|----|----|----|----|----|----|----|

*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
*
|

14:32:06 Writing NN index file to temp file /tmp/RtmpTWUvFU/fil

In [134]:
## save res ####
write.csv(sro.input@reductions$pca@cell.embeddings, file = paste0(pref.sro.input, "PCA.csv"), quote = F)
write.csv(sro.input@reductions$umap@cell.embeddings, file = paste0(pref.sro.input, "UMAP.csv"), quote = F)
write.csv(sro.input@meta.data, file = paste0(pref.sro.input, "meta-data.csv"), quote = F)
saveRDS(sro.input, file = paste0(pref.sro.input, "SRO.rds"))

In [135]:
## cellxgene ####
unimp.expr <- t(as.matrix(sro.input@assays$RNA$data))
unimp.expr.dgC <- as(unimp.expr, "sparseMatrix") 
adata <- AnnData(
  X = unimp.expr.dgC,
  obs = sro.input@meta.data,
  obsm = list(
    X_umap = sro.input@reductions$umap@cell.embeddings
  )
)
write_h5ad(adata, paste0(pref.sro.input, "unimputed-expr.h5ad"))


“sparse->dense coercion: allocating vector of size 10.4 GiB”


## plots

In [3]:
# 2. RNA analysis ####
sro.a <- readRDS(paste0('Seurat/adult/SRO.rds'))

In [4]:
sro.input <- sro.a
pref.sro.input <- pref.sro.a
pref.p.sro.input <- pref.p.sro.a

In [6]:
pdf(paste0(pref.p.sro.input, "QC/violin-paper.annotations-RNA-QC.pdf"), width = 16, height = 6)
plot.all.QC(sro.input, ident = "paper.annotations")
dev.off()

[[1]]

[[2]]

[[3]]

[[4]]

[[5]]


In [136]:
## plots ####
dir.create(paste0(pref.p.sro.input, 'QC/'), recursive = T)

for (res in res.list){
  colname <- paste0("RNA_snn_res.", res)
  pl <- plot.all.QC(sro.input, ident = colname, col = pal$Clusters_long)
  pdf(paste0(pref.p.sro.input, "QC/violin-QC-", colname, ".pdf"), width = 12, height = 6)
  for (i in 1:length(pl)){
    print(pl[[i]])
  }
  dev.off()
}

pdf(paste0(pref.p.sro.input, "QC/violin-sample-RNA-QC.pdf"), width = 8, height = 6)
plot.all.QC(sro.input, ident = "sample")
dev.off()

pdf(paste0(pref.p.sro.input, "QC/UMAP-QC.pdf"), width = 12, height = 10)
plot.continuous.value(sro.input, idx = rownames(sro.input@meta.data), vis = sro.input@reductions$umap@cell.embeddings,
                      val = sro.input$nCount_RNA, val.name='nCount_RNA', point.size=1)
plot.continuous.value(sro.input, idx = rownames(sro.input@meta.data), vis = sro.input@reductions$umap@cell.embeddings,
                      val = sro.input$nFeature_RNA, val.name='nFeature_RNA', point.size=1)
plot.continuous.value(sro.input, idx = rownames(sro.input@meta.data), vis = sro.input@reductions$umap@cell.embeddings,
                      val = sro.input$MtFrac_RNA, val.name='MtFrac_RNA', point.size=1)
plot.continuous.value(sro.input, idx = rownames(sro.input@meta.data), vis = sro.input@reductions$umap@cell.embeddings,
                      val = sro.input$S.Score, val.name='S.Score', point.size=1)
plot.continuous.value(sro.input, idx = rownames(sro.input@meta.data), vis = sro.input@reductions$umap@cell.embeddings,
                      val = sro.input$G2M.Score, val.name='G2M.Score', point.size=1)
dev.off()

[[1]]

[[2]]

[[3]]

[[4]]

[[5]]


In [137]:
pdf(paste0(pref.p.sro.input, "UMAP-Clusters.pdf"), width = 12, height = 10)
for (res in res.list){
  colname <- paste0("RNA_snn_res.", res)
  print(
    plot.clusters(sro.input, groups = sro.input@meta.data[[colname]],
                  clusters.col = colname, col = pal$Clusters_long,
                  label.size = 5, point.size = 0.5,
                  pref.C = T)
  )
}
dev.off()

pdf(paste0(pref.p.sro.input, "UMAP-sample.pdf"), width = 12, height = 10)
plot.clusters(sro.input, groups = sro.input$sample, clusters.col = "sample",
              col = pal$Clusters, label.size = 5, labels = F, point.size = 0.5,
              label.pad = 1, pref.C = F)
dev.off()

pdf(paste0(pref.p.sro.input, "UMAP-age.pdf"), width = 12, height = 10)
plot.clusters(sro.input, groups = sro.input$age, clusters.col = "age",
              col = pal$Clusters, label.size = 5, labels = F, point.size = 0.5,
              label.pad = 1, pref.C = F)
dev.off()

pdf(paste0(pref.p.sro.input, "UMAP-tissue.pdf"), width = 12, height = 10)
plot.clusters(sro.input, groups = sro.input$tissue, clusters.col = "tissue",
              col = pal$Clusters, label.size = 5, labels = F, point.size = 0.5,
              label.pad = 1, pref.C = F)
dev.off()


In [138]:
# for adult
# pdf(paste0(pref.p.sro.input, "UMAP-paper.annotations.pdf"), width = 12, height = 10)
# plot.clusters(sro.input, groups = sro.input$paper.annotations, clusters.col = "paper.annotations",
#               col = pal$Clusters, label.size = 5, labels = T, point.size = 0.5,
#               label.pad = 1, pref.C = F)
# dev.off()


In [139]:
# for child
pdf(paste0(pref.p.sro.input, "UMAP-paper.clusters.pdf"), width = 12, height = 10)
plot.clusters(sro.input, groups = sro.input$paper.clusters, clusters.col = "paper.clusters",
              col = pal$Clusters, label.size = 5, labels = T, point.size = 0.5,
              label.pad = 1, pref.C = F)
dev.off()


# early life

## subcluster

In [12]:
FindSubCluster.custom <- function(sro, sro.subset, master.res, 
                                  newcolname.pref,
                                  subres.list = seq(0.1, 0.5, 0.1)
                                  ){
  cell.count <- rowSums(sro.subset@assays$RNA@counts > 0)
  sro.subset <- NormalizeData(sro.subset[cell.count > 1, ]) %>%
    FindVariableFeatures(selection.method = "vst", nfeatures = 5000)
  
  sro.subset <- ScaleData(sro.subset, features = rownames(sro.subset)) %>%
    RunPCA(features = rownames(sro.subset), npcs = 50) %>%
    FindNeighbors(dims = 1:30, k.param = 30) %>%
    FindClusters(resolution = subres.list) %>%
    RunUMAP(dims = 1:30, n.neighbors = 30, metric = "cosine", min.dist = 0.4, spread = 1)
  
  for (res in subres.list){
    colname.in.subset <- paste0("RNA_snn_res.", res)
    newcolname <- paste0(newcolname.pref, res)
    subclusters <- ifelse(test = is.na(sro.subset@meta.data[colnames(sro), colname.in.subset]),
                          yes = sro.subset@meta.data[colnames(sro), colname.in.subset],
                          no = paste0("sub", sro.subset@meta.data[colnames(sro), colname.in.subset])
    )
    sro@meta.data[[newcolname]] <- coalesce(subclusters, sro@meta.data[[master.res]])
  }
  return(sro)
}


In [13]:
pref.sro.a <- 'Seurat/adult/'; pref.p.sro.a <- 'plots/Seurat/adult/'
pref.sro.e <- 'Seurat/early/'; pref.p.sro.e <- 'plots/Seurat/early/'

sro.e <- readRDS(paste0(pref.sro.e, 'SRO.rds'))

In [14]:
sro.input <- sro.e
pref.sro.input <- pref.sro.e
pref.p.sro.input <- pref.p.sro.e

In [9]:
subres.list <- seq(0.1, 0.5, 0.1)
sro.subset <- subset(sro, RNA_snn_res.0.2 == 7)
sro.input <- FindSubCluster.custom(sro.input, sro.subset, 'RNA_snn_res.0.2', 'res.0.2_C7_subres.', subres.list)

Centering and scaling data matrix

PC_ 1 
Positive:  Gpx1, Rps8, Cfp, Taldo1, Rpl8, Rpl3, Rpl14, Ltb, Rps18, Rpl15 
	   Rps27a, Rpl10a, Rps3, Rpl28, Rps3a1, Rpl13, Rps4x, Rps2, Rps11, Rack1 
	   Med30, Rpl32, Ly6e, Rps6, Rpl6, Rpl7, Rpl31, Rpl35a, Oaz1, Rpl9 
Negative:  Slc4a8, Anxa3, Samsn1, Slc22a23, Mreg, Tmcc3, Cd63, Asap1, Tbc1d8, Slco5a1 
	   Rabgap1l, Arhgap28, Nlrc5, Tnfrsf4, Tmem123, Trio, Ldb2, Aire, Etv3, Cers6 
	   Asprv1, Sema6d, Mxd1, Malat1, Pakap.1, Rcsd1, Slc6a6, Icosl, Zc3h12c, Arhgap22 
PC_ 2 
Positive:  Atp1b1, Tmem176b, Gria3, Cntn1, Tmem176a, Mdk, Plek, Cd80, Dpp10, Tmtc2 
	   Tmsb10, Atp8b4, Sdk1, Zfand6, 2310067P03Rik, Gmfg, Tmem132c, Ptma, Fut8, Ly86 
	   S100a6, Nostrin, Nrg1, Sh2d4a, Cdc42ep3, Tspan13, Pde1c, Nme7, Cd37, Slc8a1 
Negative:  Mpeg1, Cst3, Dnase1l3, Prdm16, Bst2, ENSMUSG00000095041, Lamp1, Epcam, Tyrobp, Slamf7 
	   Irf8, Pbx1, Atf3, Ucp2, H2az1, Cbfa2t3, Itgb1, Creg1, Ly75, Psmb8 
	   Laptm4b, Sqstm1, Mif, Itga4, Lpin1, Osbpl3, Ftl1, Tnni2, Smpd

Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 1942
Number of edges: 106513

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9335
Number of communities: 4
Elapsed time: 0 seconds


“UNRELIABLE VALUE: One of the ‘future.apply’ iterations (‘future_lapply-1’) unexpectedly generated random numbers without declaring so. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'future.seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this check, use 'future.seed = NULL', or set option 'future.rng.onMisuse' to "ignore".”


Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 1942
Number of edges: 106513

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.9038
Number of communities: 6
Elapsed time: 0 seconds


“UNRELIABLE VALUE: One of the ‘future.apply’ iterations (‘future_lapply-2’) unexpectedly generated random numbers without declaring so. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'future.seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this check, use 'future.seed = NULL', or set option 'future.rng.onMisuse' to "ignore".”


Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 1942
Number of edges: 106513

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8762
Number of communities: 6
Elapsed time: 0 seconds


“UNRELIABLE VALUE: One of the ‘future.apply’ iterations (‘future_lapply-3’) unexpectedly generated random numbers without declaring so. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'future.seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this check, use 'future.seed = NULL', or set option 'future.rng.onMisuse' to "ignore".”


Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 1942
Number of edges: 106513

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8497
Number of communities: 6
Elapsed time: 0 seconds


“UNRELIABLE VALUE: One of the ‘future.apply’ iterations (‘future_lapply-4’) unexpectedly generated random numbers without declaring so. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'future.seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this check, use 'future.seed = NULL', or set option 'future.rng.onMisuse' to "ignore".”


Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck

Number of nodes: 1942
Number of edges: 106513

Running Louvain algorithm...
Maximum modularity in 10 random starts: 0.8268
Number of communities: 8
Elapsed time: 0 seconds


“UNRELIABLE VALUE: One of the ‘future.apply’ iterations (‘future_lapply-5’) unexpectedly generated random numbers without declaring so. There is a risk that those random numbers are not statistically sound and the overall results might be invalid. To fix this, specify 'future.seed=TRUE'. This ensures that proper, parallel-safe random numbers are produced via the L'Ecuyer-CMRG method. To disable this check, use 'future.seed = NULL', or set option 'future.rng.onMisuse' to "ignore".”
“The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session”
14:43:19 UMAP embedding parameters a = 0.7669 b = 1.223

14:43:19 Read 1942 rows and found 30 numeric columns

14:43:19 Using Annoy for neighbor search, n_neighbors = 30

14:43:19 Building Annoy index with metric = cosine, n_trees = 50

0%   10 

In [11]:
raster_pdf(paste0(pref.p.sro.input, "UMAP-RNA_snn_res.0.2_subC7.pdf"), width = 12, height = 10, res = 300)
for (subres in subres.list){
    group.name <- paste0("res.0.2_C7_subres.", subres)
    print(plot.clusters(sro.input, groups = sro.input@meta.data[[group.name]], clusters.col = group.name,
              col = pal$Clusters_long, label.size = 5, labels = T,
              point.size = 1, label.pad = 1, pref.C = T))
}

dev.off()

In [18]:
table(sro.input$`res.0.2_C7_subres.0.1`)


    0     1    10    11    12    13    14    15    16    17     2     3     4 
10269  9648   963   664   637   635   498   301   127    86  9471  7443  2238 
    5     6     8     9  sub0  sub1  sub2  sub3 
 2158  2072  1618  1581   842   542   510    48 

In [19]:
cl.to.anno <- c(
    '1' = 'ILC3', 
    '10' = 'ILC3',
    'sub0' = 'eTAC I',
    'sub1' = 'eTAC III',
    'sub2' = 'eTAC II',
    'sub3' = 'Proliferating eTAC'
)

In [23]:
head(sro$res.0.2_C7_subres.0.1)

In [26]:
sro.input$paper.annot <- ifelse(
    sro.input$`res.0.2_C7_subres.0.1` %in% names(cl.to.anno),
    yes = cl.to.anno[sro.input$`res.0.2_C7_subres.0.1`],
    no = 'na'
)

In [28]:
raster_pdf(paste0(pref.p.sro.input, "UMAP-paper.annot.pdf"), width = 12, height = 10, res = 300)
group.name <- "paper.annot"
print(plot.clusters(sro.input, groups = sro.input@meta.data[[group.name]], clusters.col = group.name,
          col = pal$Clusters, label.size = 5, labels = T,
          point.size = 1, label.pad = 1, pref.C = F))
dev.off()

In [29]:
## save res ####
write.csv(sro.input@meta.data, file = paste0(pref.sro.input, "meta-data.csv"), quote = F)
saveRDS(sro.input, file = paste0(pref.sro.input, "SRO.rds"))

## paper annot

In [23]:
cl.to.annot <- c(
  "0" = "LTi",
  "1" = "LTi",
  "2" = "LTi",
  "3" = "LTi",
  "4" = "LTi",
  "5" = "LTi-like ILC",
  "6" = "R-eTAC1",
  "7" = "LTi",
  "9" = "R-eTAC2",
  "10" = "LTi",
  "11" = "R-cDC2",
  "12" = "R-eTAC3",
  "14" = "R-cDC1",
  "15" = "LTi",
  "16" = "R-mDC",
  "17" = "LTi",
  "18" = "LTi"
)


In [17]:
sro.input$Cluster.annot <- sro.input$paper.annot

In [30]:
sro.input$paper.annot <- as.character(cl.to.annot[as.character(sro.input$paper.clusters)])

In [15]:
# for child
pdf(paste0(pref.p.sro.input, "UMAP-paper.clusters.pdf"), width = 12, height = 10)
plot.clusters(sro.input, groups = sro.input$paper.clusters, clusters.col = "paper.clusters",
              col = pal$Clusters, label.size = 5, labels = T, point.size = 0.5,
              label.pad = 1, pref.C = F)
dev.off()


In [18]:
# for child
pdf(paste0(pref.p.sro.input, "UMAP-Cluster.annot.pdf"), width = 12, height = 10)
plot.clusters(sro.input, groups = sro.input$Cluster.annot, clusters.col = "Cluster.annot",
              col = pal$Clusters, label.size = 5, labels = T, point.size = 0.5,
              label.pad = 1, pref.C = F)
dev.off()


In [33]:
# for child
pdf(paste0(pref.p.sro.input, "UMAP-paper.annot.pdf"), width = 12, height = 10)
plot.clusters(sro.input, groups = sro.input$paper.annot, clusters.col = "paper.annot",
              col = pal$Clusters, label.size = 5, labels = T, point.size = 0.5,
              label.pad = 1, pref.C = F)
dev.off()


In [34]:
## save res ####
write.csv(sro.input@meta.data, file = paste0(pref.sro.input, "meta-data.csv"), quote = F)
saveRDS(sro.input, file = paste0(pref.sro.input, "SRO.rds"))