## projection of WGCNA

some part of this script needs scRNA-seq object of each dataset.

In [1]:
vlib = c("tidyverse", "ggpubr", "patchwork","tidyseurat", "ggrastr", "ggsci",
  "future.apply", "arrow", "pheatmap", "ggrastr",  "Seurat", "hdWGCNA")
lapply(vlib, require, character.only = TRUE, quietly = TRUE) |> suppressMessages()

base_dir = "."

setwd(base_dir)

saveRDS.gz <- function(object, file, threads=4) {
  con <- pipe(paste0("pigz -p", threads, " > ", file), "wb")
  saveRDS(object, file = con)
  close(con)
}
readRDS.gz <- function(file, threads = parallel::detectCores()) {
  con <- pipe(paste0("pigz -d -c -p", threads, " ", file))
  object <- readRDS(file = con)
  close(con)
  return(object)
}


In [None]:
## shared object

seurat_obj.ref = readRDS.gz("seurat_obj/seurat_obj_wgcna_CD8.RNA.RDS")

## preprocessing each dataset (needs seurat object)

### 1. aida

for replication, please see 04_04_WGCNA_replication.ipynb

In [None]:
seurat_obj.aida = readRDS.gz("seurat_obj/seurat_obj.aida.v5.RDS") 
seurat_obj.aida <- RunAzimuth(seurat_obj.aida, reference = "pbmcref")

DefaultAssay(seurat_obj.aida) = "RNA"
seurat_obj.aida <- ProjectModules(
  seurat_obj = seurat_obj.aida,
  seurat_ref = seurat_obj.ref,
  # vars.to.regress = c(), # optionally regress covariates when running ScaleData
  group.by.vars = "orig.ident", # column in seurat_query to run harmony on
  wgcna_name_proj="projection", # name of the new hdWGCNA experiment in the query dataset
  wgcna_name = "wgcna" # name of the hdWGCNA experiment in the ref dataset
)

seurat_obj.aida <- ModuleConnectivity(
  seurat_obj.aida,
  group.by = 'predicted.celltype.l1', group_name = 'CD8 T'
)

seurat_obj.aida <- ModuleExprScore(
  seurat_obj.aida,
  method='UCell'
)
projected_hMEs <- GetModules(seurat_obj.aida)
projected_hMEs %>% write_delim("assets/projected_aida.module_kME.txt.gz")
seurat_obj.aida %>% saveRDS.gz("seurat_obj/seurat_obj.aida.v5.azimuth.wgcna.RDS")

df_ME.aida = read_delim("scRNAseq_downstream_scenic/projected_aida.module_kME.txt.gz", delim = "\t") %>%
  filter(cellID %in% df_meta_cd8_aida$cellID) %>%
  mutate(data = "AIDA_CTRL")
df_ME.ref = GetMEs(seurat_obj.ref) %>%
  as.data.frame() %>%
  rownames_to_column("cellID") %>%
  filter(cellID %in% df_meta_cd8_ref$cellID) %>%
  mutate(data = "LungCancer")

### 2. TNBC dataset

In [None]:
expression_matrix = Read10X(data.dir="GSE169246_TNBC",  gene.column=1)

seurat_obj = CreateSeuratObject(counts = expression_matrix)

df_md_original = seurat_obj@meta.data %>%
  as.data.frame() %>%
  rownames_to_column("cellID")

df_md = read_delim("GSE169246_TNBC/GSE169246_meta.txt.gz", delim = "\t") %>%
  select(-nCount_RNA, -nFeature_RNA) %>%
  as.data.frame()

df_md = inner_join(df_md_original, df_md, by = c("cellID"))
seurat_obj = subset(seurat_obj, cells = df_md$cellID)
seurat_obj@meta.data = df_md %>% as.data.frame() %>% column_to_rownames("cellID")

df_md %>% filter(grepl("CD8", celltype_l2)) %>% distinct(celltype_l2) # CD8 T cell types.

DefaultAssay(seurat_obj) = "RNA"
seurat_obj = seurat_obj %>% 
    NormalizeData() %>%
    FindVariableFeatures() %>%
    ScaleData()

seurat_obj <- RunPCA(seurat_obj)
seurat_obj <- FindNeighbors(seurat_obj, dims = 1:30, reduction = "pca")
seurat_obj <- FindClusters(seurat_obj, resolution = 0.4, cluster.name = "unintegrated_clusters")
seurat_obj <- RunAzimuth(seurat_obj, reference = "pbmcref")

seurat_obj[["RNA"]] <- split(seurat_obj[["RNA"]], f = seurat_obj$sample)

seurat_obj <- IntegrateLayers(
  object = seurat_obj, method = HarmonyIntegration,
  orig.reduction = "pca", new.reduction = "harmony",
  verbose = FALSE
)

seurat_obj <- ProjectModules(
  seurat_obj = seurat_obj,
  seurat_ref = seurat_obj.ref,
  # vars.to.regress = c(), # optionally regress covariates when running ScaleData
  group.by.vars = "sample", # column in seurat_query to run harmony on
  wgcna_name_proj="projected", # name of the new hdWGCNA experiment in the query dataset
  wgcna_name = "wgcna" # name of the hdWGCNA experiment in the ref dataset
)

seurat_obj <- ModuleConnectivity(
  seurat_obj,
  group.by = 'predicted.celltype.l1', group_name = 'CD8 T'
)

seurat_obj <- ModuleExprScore(
  seurat_obj,
  method='UCell'
)

projected_hMEs <- GetModules(seurat_obj)

projected_hMEs %>% write_delim("seurat_obj/GSE169246_TNBC/projected_hMEs.txt.gz", delim = "\t")
seurat_obj %>% saveRDS.gz("seurat_obj/GSE169246_TNBC/seurat_obj_raw_azimuth_GSE169246.RDS")

seurat_obj_GSE169246@meta.data %>% distinct(tissue, response, time, regimen)
df_meta_cd8_GSE169246  = seurat_obj_GSE169246@meta.data %>%
  rownames_to_column("cellID") %>%
  filter(predicted.celltype.l1 == "CD8 T", tissue == "blood", time == "Pre-treatment")

df_ME.GSE169246 = GetMEs(seurat_obj_GSE169246) %>%
  as.data.frame() %>%
  rownames_to_column("cellID") %>%
  filter(cellID %in% df_meta_cd8_GSE169246$cellID) %>%
  mutate(data = "TNBC")

### 3. HNSCC dataset

In [None]:
seurat_obj = readRDS.gz("GSE200996/seurat_obj.merged.RDS")
seurat_obj = seurat_obj %>%
  NormalizeData() %>%
  FindVariableFeatures() %>%
  ScaleData() %>%
  RunPCA()

seurat_obj = IntegrateLayers(
  object = seurat_obj, method = HarmonyIntegration,
  orig.reduction = "pca", new.reduction = "harmony",
  verbose = FALSE
)

seurat_obj = FindNeighbors(seurat_obj, dims = 1:30, reduction = "pca")
seurat_obj = FindClusters(seurat_obj, resolution = 0.4, cluster.name = "harmony_integrated")
seurat_obj = RunUMAP(seurat_obj, reduction = "harmony", dims = 1:30, reduction.name = "umap.harmony")
seurat_obj = JoinLayers(seurat_obj)
seurat_obj = RunAzimuth(seurat_obj, reference = "pbmcref")

seurat_obj <- ProjectModules(
  seurat_obj = seurat_obj,
  seurat_ref = seurat_obj.ref,
  # vars.to.regress = c(), # optionally regress covariates when running ScaleData
  group.by.vars = "sample_id", # column in seurat_query to run harmony on
  wgcna_name_proj="projected", # name of the new hdWGCNA experiment in the query dataset
  wgcna_name = "wgcna" # name of the hdWGCNA experiment in the ref dataset
)

seurat_obj <- ModuleConnectivity(
  seurat_obj,
  group.by = 'predicted.celltype.l1', group_name = 'CD8 T'
)

seurat_obj <- ModuleExprScore(
  seurat_obj,
  method='UCell'
)

projected_hMEs <- GetModules(seurat_obj)

projected_hMEs %>% write_delim("seurat_obj/GSE200996/projected_hMEs.txt.gz", delim = "\t")
seurat_obj %>% saveRDS.gz("seurat_obj/GSE200996/seurat_obj.mergedRDS")

df_meta_add.GSE200996 = read_delim("scRNA_wgcna_replication/GSE200996/supplementary_metadata.txt")

df_meta_cd8_GSE200996  = seurat_obj@meta.data %>%
  rownames_to_column("cellID") %>%
  left_join(., df_meta_add.GSE200996) %>%
  filter(predicted.celltype.l1 == "CD8 T", time == "baseline")

df_ME.GSE200996 = GetMEs(seurat_obj) %>%
  as.data.frame() %>%
  rownames_to_column("cellID") %>%
  filter(cellID %in% df_meta_cd8_GSE200996$cellID) %>%
  mutate(data = "HNSCC")

### TME analysis

In [None]:
seurat_obj.tme = readRDS.gz("tme_analysis/seurat_obj_sct_harmony_immuneOnly_anno_241028.RDS")
DefaultAssay(seurat_obj.tme) = "RNA"

seurat_obj.tme <- ProjectModules(
  seurat_obj = seurat_obj.tme,
  seurat_ref = seurat_obj.ref,
  # vars.to.regress = c(), # optionally regress covariates when running ScaleData
  group.by.vars = "ID", # column in seurat_query to run harmony on
  wgcna_name_proj="wgcna_projection", # name of the new hdWGCNA experiment in the query dataset
  wgcna_name = "wgcna" # name of the hdWGCNA experiment in the ref dataset
)
seurat_obj.tme <- ModuleConnectivity(
  seurat_obj.tme,
  group.by = 'anno_l1', group_name = 'CD8_T'
)

seurat_obj.tme <- ModuleExprScore(
  seurat_obj.tme,
  method='UCell'
)

projected_hMEs <- GetModules(seurat_obj.tme)
df_anno = read_delim("tme_analysis/cluster_anno_harmony_clusters1_NKTonly.txt")
new_md = seurat_obj.tme@meta.data %>%
  rownames_to_column("CellID") %>%
  select(-anno_l1, -anno_l2) %>%
  mutate(harmony_clusters1 = as.numeric(as.character(harmony_clusters1))) %>%
  left_join(df_anno) %>%
  column_to_rownames("CellID")

## SF6C

In [None]:
p1 = rbind(df_ME.aida, df_ME.ref) %>%
  rbind(., df_ME.GSE169246) %>%
  rbind(., df_ME.GSE200996) %>%
  mutate(
    data = factor(data, levels = c("AIDA_CTRL", "TNBC", "HNSCC", "LungCancer")),
    projected = ifelse(data == "LungCancer", "our results", "projected")
  ) %>%
  ggplot(aes(x = data, y = brown)) +
  geom_violin(width = 0.85) +
  geom_boxplot(width = 0.30, alpha = 0.5, outlier.shape = NA) +
  xlab("Dataset") +
  ylab("Brown module expression in CD8 ") +
  facet_grid(~projected, scales = "free_x", space = "free") +
  theme_pubr() +
  theme(
    axis.title = element_text(size = 15),
    axis.text = element_text(size = 13),
  )
options(repr.plot.width = 5, repr.plot.height = 4, repr.plot.res = 300)
p1

p1 %>% ggsave(
  "figure_prep/supplementary/sf6C_compareAIDA_CD8.pdf",
  ., height = 4, width = 5, dpi = 300
)



## SF6B

In [None]:
df_pairwise_test = left_join(df_ME.GSE200996, df_meta_cd8_GSE200996) 
pairwise.wilcox.test(df_pairwise_test$brown, df_pairwise_test$Patho_Bins, p.adjust.method = "bonferroni", correct = FALSE)

In [None]:
options(repr.plot.width = 4, repr.plot.height = 4, repr.plot.res = 300)
p_sf6_hnscc = left_join(df_ME.GSE200996, df_meta_cd8_GSE200996) %>%
mutate(Patho_Bins = factor(Patho_Bins, levels = c( "High","Medium","Low"))) %>%
filter(is.na(Patho_Bins) == FALSE) %>%
ggplot(aes(x = Patho_Bins, y = brown)) +
  geom_violin(width = 0.85) +
  geom_boxplot(width = 0.30, alpha = 0.5, outlier.shape = NA) +
  stat_compare_means() +
  xlab("pathological response") +
  ylab("GSE200996, CD8 Brown Module") +
  theme_pubr()

p_sf6_hnscc %>% ggsave(
  "figure_prep/supplementary/sf6_wgcna_projection_hgcna.pdf",
  ., height = 4, width = 4, dpi = 300
)

## SF6D

In [None]:
p2 = read_delim("assets/cell_proportion.projected_sets.txt.gz", delim = "\t") %>%
  mutate(CD8_subtype = str_replace(CD8_subtype, "_", " ")) %>%
  filter(!CD8_subtype == "CD8 Proliferating") %>%
  ggplot(aes(x = data, y = n, fill = CD8_subtype)) +
    geom_bar(stat = "identity", position = "fill") +
    theme_pubr(x.text.angle = 45, legend = "right") +
    scale_fill_simpsons()
options(repr.plot.width = 4, repr.plot.height = 4, repr.plot.res = 300)
p2 %>% ggsave(
  "figure_prep/supplementary/sf6_tem_proportion.pdf",
  ., height = 4, width = 4, dpi = 300
)

### figure 6D

In [None]:
options(repr.plot.width = 3, repr.plot.height = 4, repr.plot.res = 200)
left_join(df_ME, df_md) %>%
  filter(anno_l1 == "CD8_T") %>%
  mutate(Binary_response = factor(Binary_response, levels= c("Responder", "Non-responder"))) %>%
  ggplot(aes(x = anno_l1, y = brown, color = Binary_response)) +
    geom_violin(width = 0.80, aes(fill = Binary_response), linewidth = 0) +
    geom_boxplot(
      width = 0.30, fill = "white",
      position = position_dodge(width = 0.80),
      outlier.shape = NA,
      alpha = 0.80
    ) +
    scale_fill_manual(values = c("#afafaf", "#a3260f")) +
    scale_color_manual(values = c("grey30", "grey30")) +
    stat_compare_means() +
    theme_pubr()

### figure 6E

In [None]:
p1 = left_join(df_ME, df_md) %>%
  filter(anno_l1 == "CD8_T") %>%
  mutate(Binary_response = factor(Binary_response, levels= c("Responder", "Non-responder"))) %>%
  ggplot(aes(x = anno_l2, y = brown, color = Binary_response)) +
    geom_violin(width = 0.80, aes(fill = Binary_response), linewidth = 0) +
    geom_boxplot(
      width = 0.30, fill = "white",
      position = position_dodge(width = 0.80),
      outlier.shape = NA,
      alpha = 0.80
    ) +
    scale_fill_manual(values = c("#afafaf", "#a3260f")) +
    scale_color_manual(values = c("grey30", "grey30")) +
    stat_compare_means(aes(label = after_stat(p.signif))) +
    theme_pubr()