In [None]:
vlib = c("tidyverse", "ggpubr", "patchwork", "ggsci", "viridis",
         "enrichR", "survival", "survminer", "arrow", "ggpmisc")
lapply(vlib, require, character.only = TRUE, quietly = TRUE) |> suppressMessages()

base_dir = "."

setwd(base_dir)

saveRDS.gz <- function(object, file, threads=4) {
  con <- pipe(paste0("pigz -p", threads, " > ", file), "wb")
  saveRDS(object, file = con)
  close(con)
}
readRDS.gz <- function(file, threads = parallel::detectCores()) {
  con <- pipe(paste0("pigz -d -c -p", threads, " ", file))
  object <- readRDS(file = con)
  close(con)
  return(object)
}

df_meta_target = read_delim("analysis/assets/file_meta.txt",
                     col_names = c("levels", "time", "cluster_name")) %>% 
                     mutate(prefix_file = paste0(levels, "_", time, "_", cluster_name))

In [None]:
path_map_cis = "mashr_intra_eqtl/joined_all_results.txt.gz"
path_adj_mtx = "scRNA_GRN_scenic/adj.csv"
path_compare_studies = "compare_studies/unique_eGenes.txt.gz"
path_gene_modulescore = "scRNAseq_downstream_scenic/module_gene_scored.CD8RNA.txt.gz"
df_map_cis = read_delim(path_map_cis, show_col_types = FALSE)
df_adj_mtx = read_delim(path_adj_mtx, show_col_types = FALSE)
df_gene_modulescore = read_delim(path_gene_modulescore, show_col_types = FALSE)
df_compare_studies = read_delim(path_compare_studies, show_col_types = FALSE)

In [None]:
# all_data_prep

df_rss = read_delim("scRNA_GRN_scenic/rss_celltype.txt.gz") %>%
  rename(regulon_name = `...1`) %>%
  pivot_longer(cols = 2:ncol(.)) %>%
  filter(value > 0.30) %>%
  mutate(regulon_name = str_replace(regulon_name, "\\(\\+\\)" , ""))

df_adj_mtx_all = left_join(df_rss, df_adj_mtx, by = c("regulon_name" = "TF")) %>%
  select(-value) %>%
  group_by(name, regulon_name) %>%
  mutate(rank_importance = rank(importance) / n())

df_map_cis_all = df_map_cis %>%
  group_by(phenotype_id, condition) %>%
  select(phenotype_id, condition, lfsr, PosteriorMean, PosteriorSD) %>%
  top_n(-lfsr, n = 1) %>%
  mutate(eGene = ifelse(lfsr < 0.05, "eGene", "not eGene")) %>%
  left_join(df_meta_target, by = c("condition" = "prefix_file")) %>%
  mutate(condition = str_sub(condition, 9, -1)) %>%
  mutate(condition = ifelse(grepl("1st", condition),
    paste0("treatment_", cluster_name),
    paste0("baseline_", cluster_name)
  )
  )

df_forplot_all = left_join(
  df_adj_mtx_all,
  df_map_cis_all,
  by = c("name" = "cluster_name", "target" = "phenotype_id")
) %>%
  na.omit()

In [None]:
# filtering module
c_cluster = c("CD8")
c_regulon = c("TBX21")
c_module = c("brown")

c_specific_regulon = df_rss %>% filter(name == c_cluster) %>% pull(regulon_name)

df_adj_mtx_filtered = df_adj_mtx %>%
  filter(TF %in% c_regulon) %>%
  group_by(TF) %>%
  mutate(rank_importance = rank(importance) / n())

df_map_cis_filtered = df_map_cis_all %>%
  dplyr::filter(grepl(c_cluster, condition))

df_gene_modulescore_filtered = df_gene_modulescore %>%
  filter(color %in% c_module) %>%
  group_by(color) %>%
  mutate(rank_module = rank(value) / n())

df_joinned_forPlot = full_join(
  df_adj_mtx_filtered,
  df_gene_modulescore_filtered,
  by = c("target" = "gene_name")
) %>%
  select(target, TF, importance, rank_importance, color, value, rank_module) %>%
  rename(phenotype_id = target)

df_joinned_forPlot %>% head()

In [None]:
df_gene_modulescore_filtered = df_gene_modulescore %>%
  group_by(color) %>%
  mutate(rank_module = rank(value) / n())

df_joinned_forPlot = full_join(
  df_adj_mtx_filtered,
  df_gene_modulescore_filtered,
  by = c("target" = "gene_name")
) %>%
  select(target, TF, importance, rank_importance, color, value, rank_module) %>%
  rename(phenotype_id = target)


### figure 5A

In [None]:
df_joined_forPlot_Ours = left_join(
  df_joinned_forPlot,
  df_map_cis_filtered,
  by = "phenotype_id"
) %>%
  filter(!is.na(condition))

p5a = df_joined_forPlot_Ours %>%
  ggplot(aes(x = eGene, y = rank_module)) +
  #ggplot(aes(x = eGene, y = importance)) +
  facet_wrap(~condition) +
  geom_violin(aes(fill = eGene), width = 0.60, color = "#FFFFFF") +
  geom_boxplot(width = 0.25, alpha = 0.95) +
  stat_compare_means(
     hjust = -0.5,
     aes(label = paste0("p = ", after_stat(p.format)))
  ) +
  ggtitle("A") +
  ylab("Rank in module") +
  scale_fill_manual(values = c("#708be9a4", "grey90")) +
  scale_y_continuous(breaks = c(0, 0.5, 1)) +
  theme_pubr(legend = "none") +
  theme(
    axis.title = element_text(size = 15),
    axis.text = element_text(size = 12.5),
    strip.text = element_text(size = 14),
    strip.background = element_rect(fill = NA)
  )


In [None]:
list_onek1k = list.files(
  "correlation_mapping_onek1k/eqtl_table/",
  "eqtl_table.parquet",
  full.names = T)
list_onek1k = lapply(list_onek1k, read_parquet)
names(list_onek1k) = c("CD4_TEM", 
                             "CD4_Naive", 
                             "CD4_SOX4",
                             "CD8_TEM",
                             "CD8_Naive_TCM",
                             "CD8_S100B_T",
                             "Classic_Mono",
                             "DC",
                             "Memory_B",
                             "Naive_B",
                             "NK",
                             "NK_recruiting",
                             "Nonclassic_Mono",
                             "Plasma")
df_onek1k = list_onek1k %>% 
  bind_rows(.id = "cluster") %>% 
  #filter(FDR < 0.05) %>% 
  ungroup() %>% 
  distinct(GENE, cluster, .keep_all = TRUE)

df_onek1k = df_onek1k %>% 
  mutate(onek1k_significance = ifelse(FDR < 0.05, "eGene", "non-eGene")) %>%
  select(GENE, cluster, onek1k_significance, FDR) %>%
  dplyr::rename(phenotype_id = GENE, onek1k_fdr = FDR, onek1k_cluster = cluster)

  df_onek1k_cd8 = df_onek1k %>%
  filter(grepl("CD8", onek1k_cluster)) 

In [None]:
df_joined_forPlot_onek1k = left_join(df_joinned_forPlot, df_onek1k_cd8, by = "phenotype_id") %>%
  filter(!is.na(onek1k_significance)) %>%
  rename(eGene = onek1k_significance)
#df_joined_forPlot_onek1k

### figure 5B

In [None]:
options(repr.plot.height = 4, repr.plot.width = 9, repr.plot.res = 200)
p5_11k = df_joined_forPlot_onek1k %>%
  ggplot(aes(x = eGene, y = rank_module)) +
  #ggplot(aes(x = eGene, y = importance)) +
  facet_wrap(~onek1k_cluster) +
  geom_violin(aes(fill = eGene), width = 0.60, color = "#FFFFFF") +
  geom_boxplot(width = 0.25, alpha = 0.95) +
  stat_compare_means(
     hjust = -0.5,
     aes(label = paste0("P = ", after_stat(p.format)))
  ) +
  ggtitle("C") +
  ylab("Rank in module") +
  scale_fill_manual(values = c("#708be9a4", "grey90")) +
  scale_y_continuous(breaks = c(0, 0.5, 1)) +
  theme_pubr(legend = "none") +
  theme(
    axis.title = element_text(size = 15),
    axis.text = element_text(size = 12.5),
    strip.text = element_text(size = 14),
    strip.background = element_rect(fill = NA)
  )
ggsave("figure_prep/pRank_onek1k_eQTLeffect_estimate.pdf",  p5_11k, width = 8.5, height = 3.75)

### figure 5C

In [None]:
options(repr.plot.height = 4, repr.plot.width = 6, repr.plot.res = 200)
pbeta = df_betaDistribution_forPlot_eGene %>%
  mutate(rank_decile = as.factor(rank_decile)) %>%
  ggplot(aes(x = rank_module * 10, y = log2(abs(PosteriorMean)))) +
  #geom_jitter(
  #  data = df_betaDistribution_forPlot_noteGene,
  #  aes(x = rank_decile),
  #  color = "grey60", shape = 16, alpha = 0.4
#  ) +
  stat_poly_eq(
    formula = (x) / 10 ~ y,
    aes(
      label = paste(
        after_stat(eq.label), "P ", format(p.value, 3),
        sep = "*`,`~"
      )
    ),
    vjust = 0.1,
    parse = TRUE
  ) +
  geom_jitter(
    aes(x = rank_decile, color = "eGene"),
    color = "#385ee68f", shape = 16
  ) +
    scale_x_discrete() +
  geom_boxplot(
    aes(x = as.factor(rank_decile)),
    outlier.shape = NA,
    width = 0.75,
    alpha = 0.5,
    linewidth = 0.5
  ) +
  geom_smooth(
    aes(x = as.numeric(rank_module) * 10 + 0.5, y =  log2(abs(PosteriorMean))),
    method = "lm",
    se = FALSE,
    color = "#ca2f2f",
    linewidth = 0.75,
    alpha = 0.8
  ) +
  facet_wrap(~condition, nrow = 1) +
  xlab("Decile of module rank") +
  ylab("log2 of Absolute effect size") +
  theme_pubr(legend = "top") +
  theme(
    axis.title = element_text(size = 15),
    axis.text = element_text(size = 13),
    strip.text = element_text(size = 14),
    strip.background = element_rect(fill = NA)
  )


### figure 5D

In [None]:
df_map_cis = read_delim("joined_all_results.txt.gz")
df_eGene = df_map_cis %>% filter(lfsr < 0.05)
c_cluster ="CD8"
c_eGene = df_eGene %>% filter(grepl(c_cluster, condition)) %>% distinct(phenotype_id) %>% pull()

In [None]:
library(fgsea)
library(msigdbr)

df_CD8_module_scored = read_delim("module_gene_scored.CD8.RNA.txt")

m_df<- msigdbr(species = "Homo sapiens", category = "H") 
fgsea_sets<- m_df %>% split(x = .$gene_symbol, f = .$gs_name)


In [None]:
res = df_CD8_module_scored %>%
  filter(color == "brown", gene_name %in% c_eGene) %>%
  select(gene_name, value) %>% deframe()
df_gsea_res_CD8_egene = fgsea(fgsea_sets, res, scoreType = "pos")

pgsea_3 = df_gsea_res_CD8_egene %>% arrange(padj) %>% top_n(n = 10, -pval) %>% 
  mutate(pathway = str_sub(pathway, 10, -1)) %>%
  mutate(pathway = str_replace_all(pathway, "_", " ")) %>%
  mutate(name = "CD8") %>%
  mutate(pathway = factor(pathway, levels = rev(unique(.$pathway)))) %>% 
  mutate(color_size = ifelse(padj > 0.05 , NA, size)) %>%
  ggplot(aes(x =  NES, size = -log10(padj), y = pathway, color = color_size)) + 
  geom_point() + scale_color_gradient2(low ="#BCE4D8",
                                       mid = "#3E9AB4",
                                       high = "#2C5985",
                                       midpoint = 20,
                                       na.value = "grey70") +
  theme_pubr(legend = "right") +
  facet_wrap(~name)+
  theme(strip.background = element_rect(fill= NA, linewidth = 1),
        strip.text = element_text(size = 13)) +
  scale_x_continuous(limits = c(1,2.5), breaks = c(1,2.0, 2.5))


pgsea_3 %>% ggsave("figure_prep/p_DME_eQTLsubset_module.pdf",
                   .,
                   width = 16,
                   height = 8,
                   units = "cm",
                   dpi = 300)
 df_gsea_res_CD8_egene %>% arrange(padj) %>% top_n(n = 10, -pval) 