In [8]:
library(tidyverse)
library(data.table)

# genes from overlap of xQTL and uCoS variants

In [22]:
overlapped_ucos_sig_xqtl_gene = fread("annotated_overlapped_ucos_sig_xqtl.tsv")
overlapped_cos_sig_xqtl_gene = fread("annotated_overlapped_cos_sig_xqtl.tsv")
overlapped_finemap_sig_xqtl_gene = fread("annotated_overlapped_finemap_sig_xqtl.tsv")


In [14]:
head(overlapped_finemap_sig_xqtl_gene)

#chr,start,end,gene_id,strand,gene_name,outcomes,n
<chr>,<int>,<int>,<chr>,<chr>,<chr>,<chr>,<int>
chr2,202773149,202871765,ENSG00000163596,-,ICA1L,image_Aging5,3398
chr2,202874260,203014797,ENSG00000138442,-,WDR12,image_Aging5,386
chr2,202912213,202988262,ENSG00000138380,+,CARF,image_Aging5,2293
chr2,203014878,203226377,ENSG00000144426,+,NBEAL1,image_Aging5,1467
chr3,52836218,52897547,ENSG00000213533,-,STIMATE,image_Aging5,1
chr3,89107620,89482133,ENSG00000044524,+,EPHA3,image_Aging5,129


In [23]:
overlapped_cos_sig_xqtl_gene = overlapped_cos_sig_xqtl_gene |> rename(outcomes = colocalized_outcomes)
overlapped_finemap_sig_xqtl_gene = overlapped_finemap_sig_xqtl_gene |> rename(outcomes = event_ID.x)


In [24]:
combine_gene_lists <- function(finemapping_df, colocboost_df, gene_col = "gene_name") {
  
  # Add source identifier to each dataframe
  finemapping_df$source <- "finemapping"
  colocboost_df$source <- "ColocBoost"
  
  # Combine the dataframes
  combined_df <- rbind(finemapping_df, colocboost_df)
  
  # Create the resource column based on gene presence in both sources
  resource_summary <- combined_df %>%
    group_by(!!sym(gene_col)) %>%
    summarise(
      sources = paste(unique(source), collapse = ", "),
      .groups = "drop"
    ) %>%
    mutate(
      resource_in_FunGen_xQTL = case_when(
        sources == "finemapping" ~ "finemapping",
        sources == "ColocBoost" ~ "ColocBoost",
        sources == "ColocBoost, finemapping" | sources == "finemapping, ColocBoost" ~ "ColocBoost, finemapping",
        TRUE ~ sources
      )
    )
  
  # Merge back with combined data and remove duplicates
  final_df <- combined_df %>%
    left_join(resource_summary, by = gene_col) %>%
    select(-source, -sources) %>%
    distinct()
  
  return(final_df)
}

In [34]:
cb_overlapped_sig_xqtl_gene = rbind(overlapped_cos_sig_xqtl_gene, overlapped_ucos_sig_xqtl_gene)
dim(cb_overlapped_sig_xqtl_gene)
cb_overlapped_sig_xqtl_gene = unique(cb_overlapped_sig_xqtl_gene)
dim(cb_overlapped_sig_xqtl_gene)

combined_overlap_sig_xqtl_gene = combine_gene_lists(overlapped_finemap_sig_xqtl_gene, cb_overlapped_sig_xqtl_gene, gene_col = "gene_id")
dim(combined_overlap_sig_xqtl_gene)
combined_overlap_sig_xqtl_gene = unique(combined_overlap_sig_xqtl_gene)
dim(combined_overlap_sig_xqtl_gene)

In [35]:
table(combined_overlap_sig_xqtl_gene$resource_in_FunGen_xQTL)


             ColocBoost ColocBoost, finemapping             finemapping 
                    265                      83                       3 

In [37]:
combined_overlap_sig_xqtl_gene = combined_overlap_sig_xqtl_gene |> select(-n)

In [41]:
combined_overlap_sig_xqtl_gene$data_resources <- "FunGen_xQTL"

In [44]:
fwrite(combined_overlap_sig_xqtl_gene,"~/project/image_QTL/image_enrichment/updated_image_xQTL_gene.tsv", sep = "\t")

In [43]:
combined_overlap_sig_xqtl_gene = fread("~/project/image_QTL/image_enrichment/updated_image_xQTL_gene.tsv")
combined_overlap_sig_xqtl_gene = combined_overlap_sig_xqtl_gene |> arrange(`#chr`, start, end, outcomes,resource_in_FunGen_xQTL)

In [27]:
unique_finemap_gene = anti_join(overlapped_finemap_sig_xqtl_gene, cb_overlapped_sig_xqtl_gene, by = c("gene_id", "outcomes"))
unique_finemap_gene

#chr,start,end,gene_id,strand,gene_name,outcomes,n
<chr>,<int>,<int>,<chr>,<chr>,<chr>,<chr>,<int>
chr9,89605011,89606554,ENSG00000130222,+,GADD45G,image_Aging2,1
chr20,41136959,41196800,ENSG00000124181,+,PLCG1,image_Aging5,20
chr20,41178447,41317671,ENSG00000174306,-,ZHX3,image_Aging5,25


In [12]:
overlapped_ucos_sig_xqtl_gene |> inner_join(T1G, by = "gene_name")

#chr,start,end,gene_id,strand,gene_name,outcomes,n,Number,#CHROM,POS,SNV
<chr>,<int>,<int>,<chr>,<chr>,<chr>,<chr>,<int>,<int>,<int>,<int>,<chr>
chr2,202874260,203014797,ENSG00000138442,-,WDR12,image_Aging5,405,7,2,202878716,rs139643391


# genes from overlap of xQTL and CoS variants

In [14]:
overlapped_cos_sig_xqtl_gene |> inner_join(T1G, by = "gene_name")

#chr,start,end,gene_id,strand,gene_name,colocalized_outcomes,n,Number,#CHROM,POS,SNV
<chr>,<int>,<int>,<chr>,<chr>,<chr>,<chr>,<int>,<int>,<int>,<int>,<chr>
chr5,151029944,151093576,ENSG00000145901,-,TNIP1,image_AD2,1,15,5,151052827,rs871269
chr5,151029944,151093576,ENSG00000145901,-,TNIP1,image_Aging2,1,15,5,151052827,rs871269


In [21]:
cb_unique_gene <- anti_join(cb_overlapped_sig_xqtl_gene, overlapped_finemap_sig_xqtl_gene, by = 'gene_id')
dim(cb_unique_gene)

ERROR: Error: object 'cb_image_sig_xqtl' not found
