In [None]:
# use R with following packages.
vlib = c("tidyverse", "arrow", "ggVennDiagram", "ggpubr")
lapply(vlib, require, character.only = TRUE, quietly = TRUE) |> suppressMessages()


base_dir = "."
setwd(base_dir)

df_meta = read_delim("assets/file_meta.txt",
                     col_names = c("levels", "time", "cluster_name")) %>%
  mutate(prefix_file = paste0(levels, "_", time, "_", cluster_name))

## figure 3A

In [None]:
df_shared_proportion = read_delim("assets/compare_studies/shareproportions.txt.gz", delim = "\t")
p1 = df_shared_proportion %>% 
  pivot_longer(cols = 2:9) %>%
  mutate(name = factor(name, levels = c("B", "CD4T", "CD8T", "DC", "monocyte", "NK")),
         cluster_name = factor(cluster_name, levels = c("B", "CD4", "CD8", "DC", "Mono", "NK")),
         time = factor(ifelse(time == "base", "baseline", "treatment"), levels = c("baseline", "treatment"))) %>%
  na.omit() %>%
  ggplot(aes(x = cluster_name, y = name, fill = value)) +
  geom_tile() +
  scale_fill_gradient2(low = "#2345CA", mid = "#F7E7CA", high = "#CA3423", midpoint = 0.45) +
  facet_wrap(~time) +
  xlab("Our study") +
  ylab("label from 1M-bloodNL study")+
  theme_pubr(legend = "bottom", x.text.angle = 45)+
  theme(axis.title.x = element_text(size = 14),
        axis.title.y = element_text(size = 14, vjust = 0.5),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        strip.background = element_rect(fill = "white",linewidth = 1),
        strip.text = element_text(size = 12),
        legend.key.height = unit(0.4, "cm"),
        legend.key.width= unit(1.4, "cm"),
        panel.background = element_rect(fill = "white", colour = "black", linewidth = 1))
p1
ggsave("figure_prep/p_shared_proportion_with_sceqtlgen.pdf", width = 18, height = 12, units = "cm", dpi = 600)

## figure 3B

In [None]:
df_gtex_wholeblood = read_delim("assets/compare_studies/gtex/whole_blood_significant.txt.gz")
df_map_cis = read_delim("assets/02_mashr/joined_all_results.txt.gz")
df_onek1k = read_delim("assets/compare_studies/OneK1K/eGene_list_OneK1K.txt.gz")

df_map_cis_eGenes = df_map_cis %>%
  filter(lfsr < 0.05) %>%
  select(phenotype_id) %>%
  distinct() %>%
  mutate(group = "ours")
df_gtex_wholeblood_eGenes = df_gtex_wholeblood %>%
  filter(gtex_significance == TRUE) %>%
  select(phenotype_id) %>%
  select(phenotype_id) %>%
  mutate(group = "gtex")
df_onek1k_eGenes = df_onek1k %>%
  filter(onek1k_significance == TRUE) %>%
  distinct(phenotype_id) %>%
  select(phenotype_id) %>%
  mutate(group = "onek1k")


rbind(df_map_cis_eGenes, df_gtex_wholeblood_eGenes, df_onek1k_eGenes) %>%
  group_by(phenotype_id, group) %>%
  distinct() %>%
  mutate(value = 1) %>%
  pivot_wider(names_from = group) %>% head()

x <- list(ours = df_map_cis_eGenes$phenotype_id,
          gtex= df_gtex_wholeblood_eGenes$phenotype_id,
          onek1k = df_onek1k_eGenes$phenotype_id)

library(ggVennDiagram)

p = ggVennDiagram::ggVennDiagram(x, label_alpha = 0)+ scale_fill_gradient(low = "#FFFFFF", high = "#4981BF")
p
p %>% ggsave("./figure_prep/pCompare_studies.pdf", ., width = 10, height = 10, dpi = 300, units = "cm")

## figure 3C.

In [None]:
unique_gene_list = rbind(df_map_cis_eGenes, df_gtex_wholeblood_eGenes, df_onek1k_eGenes) %>%
  group_by(phenotype_id, group) %>%
  distinct() %>%
  mutate(value = 1) %>%
  pivot_wider(names_from = group, values_fill = 0) %>%
  filter(ours == 1, gtex == 0, onek1k == 0) %>%
  pull(phenotype_id)

library(enrichR)
dbs = c("GO_Biological_Process_2023", "GO_Cellular_Component_2023", "GO_Molecular_Function_2023",
        "KEGG_2021_Human", "MSigDB_Hallmark_2020", "Reactome_2022")
test_df = enrichr(unique_gene_list, dbs)

In [None]:
p_GO_uniqueGenes = test_df %>%
  bind_rows(.id = "db") %>%
  filter(db == "KEGG_2021_Human")
  group_by(db) %>%
  top_n(n = 10, Combined.Score)  %>%
  arrange(Adjusted.P.value) %>%
  mutate(Term = factor(Term, levels = rev(unique(.$Term)))) %>% 
  ggplot(aes(x = -log10(Adjusted.P.value), y = Term)) +
  geom_bar(stat = "identity") +
  geom_vline(xintercept = -log10(0.05))+
  xlab("-log 10 of Adjusted p-value") +
  ylab("KEGG Pathway") +
  theme_pubr() 


options(repr.plot.width = 18, repr.plot.height = 12, repr.plot.res = 150)
p_GO_uniqueGenes

ggsave("./figure_prep/pCompare_studies_GO.pdf", p_GO_uniqueGenes, width = 20, height = 10, dpi = 300, units = "cm")

## supplementary figure 2

In [None]:
options(repr.plot.width = 5, repr.plot.height = 5, repr.plot.res = 200)
read_delim("correlation_mapping_onek1k/sharedproportions.parsed.txt", col_names = FALSE) %>%
  ggplot(aes(x = X2, y = X1)) +
  geom_violin() +
  theme_pubr() +
  geom_point() +
  geom_boxplot(outlier.shape = NA, alpha = 0.5, width = 0.25) +
  xlab("label") +
  ylab("shared proportion from mashr") +
  scale_y_continuous(limits = c(0,1), breaks = c(0, 0.5, 1))+
  stat_compare_means(label.x.npc = "middle", hjust = 0.5)