In [None]:
# use R with following packages.
vlib = c("tidyverse", "arrow", "coloc", "ggpubr", "locuszoomr",
         "ggsci", "AnnotationHub", "patchwork", "pheatmap")
lapply(vlib, require, character.only = TRUE, quietly = TRUE) |> suppressMessages()


base_dir = "."
setwd(base_dir)

df_meta = read_delim("assets/file_meta.txt",
                     col_names = c("levels", "time", "cluster_name")) %>%
  mutate(prefix_file = paste0(levels, "_", time, "_", cluster_name))

In [None]:
df_meta_target = df_meta %>%
  mutate(coloc_asthma = paste0("assets/fine_mapping/asthma/", prefix_file, ".coloc_abf.summary.txt.gz"),
         coloc_uc = paste0("assets/fine_mapping/IBD/", prefix_file, ".coloc_abf.summary.txt.gz"),
         coloc_cd = paste0("assets/fine_mapping/crohn/", prefix_file, ".coloc_abf.summary.txt.gz"),
         coloc_blood = paste0("assets/fine_mapping/blood_traits/", prefix_file, ".coloc_abf.summary.txt.gz"),
         coloc_lc = paste0("assets/fine_mapping/LC_gwas/", prefix_file, ".coloc_abf.summary.txt.gz"),
         coloc_chip = paste0("assets/fine_mapping/CHIP_GWAS/", prefix_file, ".coloc_abf.summary.txt.gz"),
         coloc_t1d = paste0("assets/fine_mapping/t1d/", prefix_file, ".coloc_abf.summary.txt.gz"),
         coloc_sle = paste0("assets/fine_mapping/sle/", prefix_file, ".coloc_abf.summary.txt.gz"),
         coloc_ra = paste0("assets/fine_mapping/ra/", prefix_file, ".coloc_abf.summary.txt.gz"))


In [None]:
collapse_coloc = function(c_fname_coloc_out, df_metadata) {
  df_res =  lapply(c_fname_coloc_out, read_delim) %>% bind_rows() %>% rename(prefix_file = prefix) %>%
    left_join(df_meta_target) %>% filter(PP.H4.abf >= 0.6) %>%
    select(prefix_file, time, cluster_name, PP.H3.abf, PP.H4.abf, phenotype_id)
  return(df_res)
}

In [None]:
list_coloc = vector(mode = "list", length = 9L)

list_coloc$asthma = collapse_coloc(df_meta_target$coloc_asthma, df_metadata)
list_coloc$uc = collapse_coloc(df_meta_target$coloc_uc, df_metadata)
list_coloc$cd = collapse_coloc(df_meta_target$coloc_cd, df_metadata)
list_coloc$blood = collapse_coloc(df_meta_target$coloc_blood, df_metadata)
list_coloc$lc = collapse_coloc(df_meta_target$coloc_lc, df_metadata)
list_coloc$chip = collapse_coloc(df_meta_target$coloc_chip, df_metadata)
list_coloc$t1d = collapse_coloc(df_meta_target$coloc_t1d, df_metadata)
list_coloc$sle = collapse_coloc(df_meta_target$coloc_sle, df_metadata)
list_coloc$ra = collapse_coloc(df_meta_target$coloc_ra, df_metadata)

In [None]:
df_map_cis = read_delim("02_mashr/joined_all_results.txt.gz", delim = "\t")

In [None]:
p_coloc = list_coloc %>%
  bind_rows(.id = "trait") %>%
  mutate(trait = factor(trait,
                        levels = rev(c("blood", "chip", "asthma", "uc", "cd", "sle", "lc", "ra", "t1d"))),
         time = ifelse(time == "base", "before", "after"),
         time = factor(time, levels = c("before", "after"))) %>%
  group_by(time, cluster_name, trait) %>%
  summarise(n = n()) %>%
  ggplot(aes(x = cluster_name, y = trait, fill = n)) +
  geom_tile() +
  geom_text(aes(label = n), color = "grey10") +
  scale_fill_gradient2(low = "white", high = "#B23121", midpoint = 0) +
  theme_pubr(x.text.angle = 90) +
  xlab("eQTL cluster") +
  ylab("GWAS Traits") +
  facet_wrap(~time) +
  theme(axis.title.x = element_text(size = 14),
        axis.text.x = element_text(vjust = 0.5),
        axis.title.y = element_text(size = 14),
        axis.line.x = element_blank(),
        axis.line.y = element_blank(),
        strip.background = element_rect(fill = "white", linewidth = 1),
        strip.text = element_text(size = 12),
        panel.background = element_rect(fill = "white", colour = "black", linewidth = 1),
  )

p_coloc
p_coloc %>% ggsave("assets/figure_prep/p_coloc.pdf", ., width = 15, height = 12, dpi = 400, units = "cm")

In [None]:
df_coloc_meta = list_coloc %>%
  bind_rows(.id = "trait") %>%
  mutate(prefix_coloc_list = paste(sep = "-", trait, prefix_file, phenotype_id)) %>%
  mutate(
    # ugly modifiers.
    trait_folder = ifelse(trait == "uc", "IBD", trait),
    trait_folder = ifelse(trait_folder == "lc", "LC_gwas", trait_folder),
    trait_folder = ifelse(trait_folder == "cd", "crohn", trait_folder),
    trait_folder = ifelse(trait_folder == "chip", "CHIP_GWAS", trait_folder),
    trait_folder = ifelse(trait_folder == "blood", "blood_traits", trait_folder),
  ) %>%
  mutate(candidate_coloc = paste0("assets/fine_mapping/", trait_folder, "/coloc_significant/",
    prefix_file, "_", phenotype_id, "_df_coloc.txt.gz"
  ))

In [None]:
list_candidate_coloc = lapply(df_coloc_meta$candidate_coloc, read_delim)
names(list_candidate_coloc) = df_coloc_meta$prefix_coloc_list

In [None]:
test_variant = read_delim("assets/fine_mapping/ccre/rsid_anno/all_possible_variants.vep.txt", skip = 44) %>%
  select(1,2,13) %>%
  distinct()
test_variant %>% 
  write_delim("assets/fine_mapping/ccre/rsid_anno/all_possible_variants.vep.parsed.txt.gz", delim = "\t")
test_variant = read_delim("assets/fine_mapping/ccre/rsid_anno/all_possible_variants.vep.parsed.txt.gz")
colnames(test_variant) = c("eqtl_name", "location", "rsid")

In [None]:
# each_pipeline
idx = 7
df_candidate = list_candidate_coloc[[idx]] %>%
    mutate(eqtl_name = paste(sep = ":", chr, pos, A1, A2)) %>%
    dplyr::select(-rsid, -ALT, -REF) %>%
    mutate(chr = str_sub(chr, 4, -1)) %>%
    left_join(test_variant) %>%
    separate(rsid, sep = ",", into = c("rsid", "discard"), remove = FALSE) %>%
    dplyr::select(-discard) %>%
    mutate(rsid = ifelse(grepl("COSV|-|CR", rsid), NA, rsid)) %>%
    as.data.frame()
c_eQTL_topSNP = df_map_cis %>% dplyr::filter(phenotype_id == "GSDMB") %>% distinct(variant_id) %>% pull()
c_index_snp = df_candidate %>% dplyr::filter(eqtl_name == c_eQTL_topSNP) %>% pull(rsid)

In [None]:
df_candidate %>% dplyr::filter(eqtl_name == c_eQTL_topSNP)

In [None]:
for (idx in seq_along(list_candidate_coloc)){
  df_candidate =  list_candidate_coloc[[idx]] %>%
    mutate(eqtl_name = paste(sep = ":", chr, pos, A1, A2)) %>%
    dplyr::select(-rsid, -ALT, -REF) %>%
    mutate(chr = str_sub(chr, 4, -1)) %>%
    left_join(test_variant) %>%
    separate(rsid, sep = ",", into = c("rsid", "discard"), remove = FALSE) %>%
    dplyr::select(-discard) %>%
    mutate(rsid = ifelse(grepl("COSV|-|CR", rsid), NA, rsid)) %>%
    as.data.frame()

  c_index_snps = df_candidate %>%
    top_n(n = 1, -pval_eqtl) %>%
    top_n(n = 1, -pval) %>%
    pull(rsid)
  
  print(c_index_snps)
  index_snp = c_index_snps[1]
  if (require(AnnotationHub)) {
    ah = AnnotationHub()
    ensDb = ah[["AH116291"]]
  }

  if (grepl("rs", index_snp) == TRUE){
    loc_gwas <- locus(data = df_candidate,
                    chrom = "chr",
                    pos = "pos",
                    p = "pval",
                    labs = "rsid",
                    gene = df_coloc_meta$phenotype_id[idx], flank = 2e5,
                    ens_db = ensDb)
    loc_gwas <- link_LD(loc_gwas, token = "35e4b28160c6")

    loc_eqtl <- locus(data = df_candidate,
                      chrom = "chr",
                      pos = "pos",
                      p = "pval_eqtl",
                      labs = "rsid",
                      gene = df_coloc_meta$phenotype_id[idx], flank = 2e5,
                      ens_db = ensDb)
    loc_eqtl$data$ld = loc_gwas$data$ld

    p1 = gg_scatter(loc_gwas, index_snp = index_snp, lab = "index", nudge_x = 0.05)
    p2 = gg_scatter(loc_eqtl, index_snp = index_snp, lab = "index")
  } else {
    loc_gwas <- locus(data = df_candidate,
                    chrom = "chr",
                    pos = "pos",
                    p = "pval",
                    labs = "eqtl_name",
                    gene = df_coloc_meta$phenotype_id[idx], flank = 2e5,
                    ens_db = ensDb)
    loc_eqtl <- locus(data = df_candidate,
                      chrom = "chr",
                      pos = "pos",
                      p = "pval_eqtl",
                      labs = "eqtl_name",
                      gene = df_coloc_meta$phenotype_id[idx], flank = 2e5,
                      ens_db = ensDb)
    p1 = gg_scatter(loc_gwas, lab = "index", nudge_x = 0.05)
    p2 = gg_scatter(loc_eqtl, lab = "index")
  }

  pgene = gg_genetracks(loc_gwas)
  patchwork = wrap_plots(p1, p2, pgene, ncol = 1, heights = c(2, 2, 1)) +
    plot_annotation(
      title = names(list_candidate_coloc)[idx],
      subtitle = paste0("PP.H4 = ", format(df_coloc_meta$PP.H4.abf[idx], digits = 3),
        ", Min. P_gwas = ", format(min(df_candidate$pval), digits = 3),
        ", Min. P_eqtl = ", format(min(df_candidate$pval_eqtl), digits = 3)
      )
    )
  ggsave(paste0("figure_prep/visualization_coloc/", names(list_candidate_coloc)[idx], ".pdf"),
         patchwork, width = 18, height = 24, units = "cm", dpi = 300)
}