In [16]:
setwd("~/0-workspace/CCR7_DC/oral-tolerance-Gardner/")

suppressPackageStartupMessages({
library(Seurat) # v4.4
library(Matrix)
library(future)
library(dplyr)
library(ggplot2)
library(cowplot)
library(ComplexHeatmap)
library(circlize)
library(ggrepel)
})
set.seed(1)
options(future.globals.maxSize = Inf)

pal <- readRDS("plots/palette.rds")
source("../utils.R")

In [17]:
cols <- rev(RColorBrewer::brewer.pal(11,"Spectral"))
transitions <- c(0, 15, 25, 30, 40, 50, 60, 70, 75, 85, 100)
scaled_transitions <- scales::rescale(transitions, from = c(0, 100), to = c(0, 1))
scale.color <- scale_color_gradientn(colors = cols, values = scaled_transitions)

In [18]:
RenameGenesSeurat <- function(obj, newnames) { # Replace gene names in different slots of a Seurat object. Run this before integration. It only changes obj@assays$RNA@counts, @data and @scale.data.
  print("Run this before integration. It only changes obj@assays$RNA@counts, @data and @scale.data.")
  RNA <- obj@assays$RNA
  
  if (nrow(RNA) == length(newnames)) {
    if (length(RNA@counts)) RNA@counts@Dimnames[[1]] <- newnames
    if (length(RNA@data)) RNA@data@Dimnames[[1]] <- newnames
    if (length(RNA@scale.data)) rownames(RNA@scale.data) <- newnames
    if (length(RNA@meta.features)) {
      RNA@meta.features$prev.names <- rownames(RNA@meta.features)
      rownames(RNA@meta.features) <- newnames
    }
    if (length(RNA@var.features)){
      RNA@var.features <- rownames(RNA@meta.features[match(RNA@var.features, RNA@meta.features$prev.names), ])
    }
  } else {"Unequal gene sets: nrow(RNA) != nrow(newnames)"}
  obj@assays$RNA <- RNA
  return(obj)
}

In [19]:
select.markers <- function(fn, output.dir = 'results/markers/', pairwise = F, fc.thr = 1.5, apv.thr = 0.01, n = Inf, exCC = F, cc.genes = NULL){
  markers <- read.csv(file = paste0(output.dir, fn, ".csv"), header = T, stringsAsFactors = F) %>%
    subset(!grepl("(^MT-)|(^RPS)|(^RPL)|(^MRPL)|(^MRPS)", toupper(gene)))
  if (exCC){
    markers <- markers[!(toupper(markers$gene) %in% toupper(cc.genes)),]
  }
  if(pairwise){
    markers <- subset(markers, abs(avg_log2FC) > log2(fc.thr) & p_val_adj < apv.thr)
    markers$cluster <- ifelse(markers$avg_log2FC > 0, "up", "down")
    if(!is.infinite(n)) markers <- markers %>% group_by(cluster) %>% top_n(n, abs(avg_log2FC))
    markers <- markers[order(markers$avg_log2FC), ]
  } else{
    markers <- subset(markers, avg_log2FC > log2(fc.thr) & p_val_adj < apv.thr)
    if(any(duplicated(markers$gene))) markers <- markers %>% group_by(gene) %>% top_n(1, avg_log2FC)
    if(!is.infinite(n)) markers <- markers %>% group_by(cluster) %>% top_n(n, avg_log2FC)
    markers <- markers[order(markers$cluster, -markers$avg_log2FC), ]
  }
  return(markers)
}

# Load SRO

In [5]:
# load SRO ####
sro.ln <- readRDS("../TC_all_LN/results/SRO_subset.rds")

In [None]:
# pref.sro.a <- 'Seurat/adult/'; pref.p.sro.a <- 'plots/Seurat/adult/'
# sro.a <- readRDS(paste0(pref.sro.a, "SRO.rds"))

pref.sro.e <- 'Seurat/early/'; pref.p.sro.e <- 'plots/Seurat/early/'
sro.e <- readRDS(paste0(pref.sro.e, "SRO.rds"))

In [23]:
# pref.sro <- pref.sro.a
# pref.p.sro <- pref.p.sro.a
# sro <- sro.a

In [39]:
pref.sro <- pref.sro.e
pref.p.sro <- pref.p.sro.e
sro <- sro.e

In [40]:
sro <- RenameGenesSeurat(sro, toupper(rownames(sro)))

[1] "Run this before integration. It only changes obj@assays$RNA@counts, @data and @scale.data."


## TC module scores

In [26]:
# load markers ####
markers.ln <- select.markers("TC-clusters", output.dir = paste0('../TC_all_LN/results/markers/'))

In [27]:
tc1.markers.ln <- markers.ln[markers.ln$cluster == 3,]
tc2.markers.ln <- markers.ln[markers.ln$cluster == 2,]
tc3.markers.ln <- markers.ln[markers.ln$cluster == 1,]
tc4.markers.ln <- markers.ln[markers.ln$cluster == 5,]
ki_tc.markers.ln <- markers.ln[markers.ln$cluster == 7,]
transitional.markers.ln <- markers.ln[markers.ln$cluster == 4,]

In [28]:
sum(toupper(tc1.markers.ln$gene) %in% rownames(sro))
sum(toupper(tc2.markers.ln$gene) %in% rownames(sro))
sum(toupper(tc3.markers.ln$gene) %in% rownames(sro))
sum(toupper(tc4.markers.ln$gene) %in% rownames(sro))
sum(toupper(ki_tc.markers.ln$gene) %in% rownames(sro))
sum(toupper(transitional.markers.ln$gene) %in% rownames(sro))

In [29]:
sro <- AddModuleScore(sro, list(toupper(transitional.markers.ln$gene)), name = "transitional.markers")
sro <- AddModuleScore(sro, list(toupper(ki_tc.markers.ln$gene)), name = "ki_tc.markers")
sro <- AddModuleScore(sro, list(toupper(tc1.markers.ln$gene)), name = "tc1.markers")
sro <- AddModuleScore(sro, list(toupper(tc2.markers.ln$gene)), name = "tc2.markers")
sro <- AddModuleScore(sro, list(toupper(tc3.markers.ln$gene)), name = "tc3.markers")
sro <- AddModuleScore(sro, list(toupper(tc4.markers.ln$gene)), name = "tc4.markers")

“The following features are not present in the object: SKP1A, ATP5F1, not searching for symbol synonyms”
“The following features are not present in the object: HIST1H1B, H2AFX, HIST1H2AE, H2AFV, HIST1H1E, HIST1H4D, HIST1H2AP, HIST1H2AF, HIST1H1D, HIST1H1A, DDX39, HIST1H3C, HIST1H4I, not searching for symbol synonyms”
“The following features are not present in the object: GM42601, GM45716, not searching for symbol synonyms”
“The following features are not present in the object: SEPT4, SEPT11, not searching for symbol synonyms”
“The following features are not present in the object: 2610528A11RIK, AC149090.1, H2AFZ, not searching for symbol synonyms”
“The following features are not present in the object: HIST1H2BC, FAM49A, 1810058I24RIK, not searching for symbol synonyms”


In [30]:
pref.p.sro

In [31]:
dir.create(paste0(pref.p.sro, "module-scores"))

“'plots/Seurat/adult/module-scores' already exists”


In [32]:
f.w <- 12; f.h <- 10

In [33]:
pdf(paste0(pref.p.sro, "module-scores/UMAP-transitional_markers-from-TC_all_LN.pdf"), width = f.w, height = f.h)
print(plot.continuous.value(
    sro, scale.color = scale.color,
    vis = sro@reductions$umap@cell.embeddings,
    idx = rownames(sro@meta.data), point.size = 1,
    val = get.named.vector.sro(sro, paste0("transitional.markers1")), val.name = 'module\nscore') + ggtitle("transitional.markers"))
dev.off()

pdf(paste0(pref.p.sro, "module-scores/UMAP-TCI_markers-from-TC_all_LN.pdf"), width = f.w, height = f.h)
print(plot.continuous.value(
    sro, scale.color = scale.color,
    vis = sro@reductions$umap@cell.embeddings,
    idx = rownames(sro@meta.data), point.size = 1,
    val = get.named.vector.sro(sro, paste0("tc1.markers1")), val.name = 'module\nscore') + ggtitle("TC I markers"))
dev.off()

pdf(paste0(pref.p.sro, "module-scores/UMAP-TCII_markers-from-TC_all_LN.pdf"), width = f.w, height = f.h)
print(plot.continuous.value(
    sro, scale.color = scale.color,
    vis = sro@reductions$umap@cell.embeddings,
    idx = rownames(sro@meta.data), point.size = 1,
    val = get.named.vector.sro(sro, paste0("tc2.markers1")), val.name = 'module\nscore') + ggtitle("TC II markers"))
dev.off()

pdf(paste0(pref.p.sro, "module-scores/UMAP-TCIII_markers-from-TC_all_LN.pdf"), width = f.w, height = f.h)
print(plot.continuous.value(
    sro, scale.color = scale.color,
    vis = sro@reductions$umap@cell.embeddings,
    idx = rownames(sro@meta.data), point.size = 1,
    val = get.named.vector.sro(sro, paste0("tc3.markers1")), val.name = 'module\nscore') + ggtitle("TC III markers"))
dev.off()

pdf(paste0(pref.p.sro, "module-scores/UMAP-TCIV_markers-from-TC_all_LN.pdf"), width = f.w, height = f.h)
print(plot.continuous.value(
    sro, scale.color = scale.color,
    vis = sro@reductions$umap@cell.embeddings,
    idx = rownames(sro@meta.data), point.size = 1,
    val = get.named.vector.sro(sro, paste0("tc4.markers1")), val.name = 'module\nscore') + ggtitle("TC IV markers"))
dev.off()

pdf(paste0(pref.p.sro, "module-scores/UMAP-KI_TC_markers-from-TC_all_LN.pdf"), width = f.w, height = f.h)
print(plot.continuous.value(
    sro, scale.color = scale.color,
    vis = sro@reductions$umap@cell.embeddings,
    idx = rownames(sro@meta.data), point.size = 1,
    val = get.named.vector.sro(sro, paste0("ki_tc.markers1")), val.name = 'module\nscore') + ggtitle("Ki67+TC markers"))
dev.off()

In [34]:
module.scores <- sro@meta.data[grep("markers1", colnames(sro@meta.data), value = T)]

In [35]:
dir.create(paste0(pref.sro, "module-scores/"))

“'Seurat/adult/module-scores' already exists”


In [36]:
write.csv(module.scores, paste0(pref.sro, "module-scores/TC_all_LN-scores.csv"), quote = F)

## eTAC signatures from paper

In [43]:
# load markers ####
etac.1 <- readxl::read_excel('Gardner-eTAC-signatures.xlsx', sheet = 1) %>% subset(lfc_mean > log2(1.5))
colnames(etac.1)[1] <- 'gene'
etac.2 <- readxl::read_excel('Gardner-eTAC-signatures.xlsx', sheet = 2) %>% subset(lfc_mean > log2(1.5))
colnames(etac.2)[1] <- 'gene'
etac.3 <- readxl::read_excel('Gardner-eTAC-signatures.xlsx', sheet = 3) %>% subset(lfc_mean > log2(1.5))
colnames(etac.3)[1] <- 'gene'
etac.prolif <- readxl::read_excel('Gardner-eTAC-signatures.xlsx', sheet = 7) %>% subset(lfc_mean > log2(1.5))
colnames(etac.prolif)[1] <- 'gene'
etac.lti <- readxl::read_excel('Gardner-eTAC-signatures.xlsx', sheet = 8) %>% subset(lfc_mean > log2(1.5))
colnames(etac.lti)[1] <- 'gene'

[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`
[1m[22mNew names:
[36m•[39m `` -> `...1`


In [44]:
sum(toupper(etac.1$gene) %in% rownames(sro))
sum(toupper(etac.2$gene) %in% rownames(sro))
sum(toupper(etac.3$gene) %in% rownames(sro))
sum(toupper(etac.prolif$gene) %in% rownames(sro))
sum(toupper(etac.lti$gene) %in% rownames(sro))

In [45]:
sro <- AddModuleScore(sro, list(toupper(etac.1$gene)), name = "etac1.markers")
sro <- AddModuleScore(sro, list(toupper(etac.2$gene)), name = "etac2.markers")
sro <- AddModuleScore(sro, list(toupper(etac.3$gene)), name = "etac3.markers")
sro <- AddModuleScore(sro, list(toupper(etac.prolif$gene)), name = "prolif.etac.markers")
sro <- AddModuleScore(sro, list(toupper(etac.lti$gene)), name = "lti.etac.markers")

“The following features are not present in the object: LY6F, FCER1A, MUP5, PAKAP-1, not searching for symbol synonyms”
“The following features are not present in the object: LY6F, not searching for symbol synonyms”
“The following features are not present in the object: LY6F, MUP5, WFDC13, PAKAP-1, not searching for symbol synonyms”
“The following features are not present in the object: FCER1A, not searching for symbol synonyms”
“The following features are not present in the object: LY6F, MUP5, FCER1A, not searching for symbol synonyms”


In [46]:
pref.p.sro

In [47]:
dir.create(paste0(pref.p.sro, "module-scores"))

“'plots/Seurat/early/module-scores' already exists”


In [48]:
f.w <- 12; f.h <- 10

In [49]:
pdf(paste0(pref.p.sro, "module-scores/UMAP-etac1.markers.pdf"), width = f.w, height = f.h)
print(plot.continuous.value(
    sro, scale.color = scale.color,
    vis = sro@reductions$umap@cell.embeddings,
    idx = rownames(sro@meta.data), point.size = 1,
    val = get.named.vector.sro(sro, paste0("etac1.markers1")), val.name = 'module\nscore') + ggtitle("eTAC 1 markers"))
dev.off()

pdf(paste0(pref.p.sro, "module-scores/UMAP-etac2.markers.pdf"), width = f.w, height = f.h)
print(plot.continuous.value(
    sro, scale.color = scale.color,
    vis = sro@reductions$umap@cell.embeddings,
    idx = rownames(sro@meta.data), point.size = 1,
    val = get.named.vector.sro(sro, paste0("etac2.markers1")), val.name = 'module\nscore') + ggtitle("eTAC 2 markers"))
dev.off()

pdf(paste0(pref.p.sro, "module-scores/UMAP-etac3.markers.pdf"), width = f.w, height = f.h)
print(plot.continuous.value(
    sro, scale.color = scale.color,
    vis = sro@reductions$umap@cell.embeddings,
    idx = rownames(sro@meta.data), point.size = 1,
    val = get.named.vector.sro(sro, paste0("etac3.markers1")), val.name = 'module\nscore') + ggtitle("eTAC 3 markers"))
dev.off()

pdf(paste0(pref.p.sro, "module-scores/UMAP-prolif.etac.markers.pdf"), width = f.w, height = f.h)
print(plot.continuous.value(
    sro, scale.color = scale.color,
    vis = sro@reductions$umap@cell.embeddings,
    idx = rownames(sro@meta.data), point.size = 1,
    val = get.named.vector.sro(sro, paste0("prolif.etac.markers1")), val.name = 'module\nscore') + ggtitle("Proliferating eTAC markers"))
dev.off()

pdf(paste0(pref.p.sro, "module-scores/UMAP-lti.etac.markers.pdf"), width = f.w, height = f.h)
print(plot.continuous.value(
    sro, scale.color = scale.color,
    vis = sro@reductions$umap@cell.embeddings,
    idx = rownames(sro@meta.data), point.size = 1,
    val = get.named.vector.sro(sro, paste0("lti.etac.markers1")), val.name = 'module\nscore') + ggtitle("LTi-like eTAC markers"))
dev.off()

In [None]:
module.scores <- sro@meta.data[grep("markers1", colnames(sro@meta.data), value = T)]

In [53]:
colnames(module.scores)

In [61]:
pl <- lapply(colnames(module.scores), function(module){
    module.name <- stringr::str_split_i(module, '.marker', i = 1)
    p <- plot.continuous.value(
        sro, scale.color = scale.color,
        vis = sro@reductions$umap@cell.embeddings,
        idx = rownames(sro@meta.data), point.size = 1,
        val = get.named.vector.sro(sro, paste0(module)), val.name = 'module\nscore') + ggtitle(module.name)
    return(p)
})

In [62]:
numcol <- 3
numrow <- ceiling(length(pl)/numcol)
pdf(paste0(pref.p.sro, "module-scores/UMAP-eTAC.markers.pdf"), width = 8*numcol, height = 6*numrow)
print(plot_grid(plotlist = pl, align = 'hv', ncol = 3))
dev.off()

In [63]:
module.scores <- sro@meta.data[grep("markers1", colnames(sro@meta.data), value = T)]

In [64]:
dir.create(paste0(pref.sro, "module-scores/"))

“'Seurat/early/module-scores' already exists”


In [65]:
write.csv(module.scores, paste0(pref.sro, "module-scores/eTAC-scores.csv"), quote = F)