In [4]:
a = fread("human_gene_name.tsv")
a$n <- 1
fwrite(a, "human_gene_name.tsv", sep = "\t")

In [None]:
sos run xqtl-protocol/code/data_preprocessing/phenotype/gene_annotation.ipynb annotate_coord \
    --cwd ./ \
    --phenoFile human_gene_name.tsv \
    --phenotype-id-column gene_name \
    --molecular-trait-type gene \
    --coordinate-annotation data/resource/references/Homo_sapiens.GRCh38.103.chr.reformatted.collapse_only.gene.ERCC.gtf

In [1]:
library(tidyverse)
library(data.table)

-- [1mAttaching core tidyverse packages[22m ------------------------ tidyverse 2.0.0 --
[32mv[39m [34mdplyr    [39m 1.1.4     [32mv[39m [34mreadr    [39m 2.1.5
[32mv[39m [34mforcats  [39m 1.0.0     [32mv[39m [34mstringr  [39m 1.5.1
[32mv[39m [34mggplot2  [39m 3.5.2     [32mv[39m [34mtibble   [39m 3.2.1
[32mv[39m [34mlubridate[39m 1.9.4     [32mv[39m [34mtidyr    [39m 1.3.1
[32mv[39m [34mpurrr    [39m 1.0.4     
-- [1mConflicts[22m ------------------------------------------ tidyverse_conflicts() --
[31mx[39m [34mdplyr[39m::[32mfilter()[39m masks [34mstats[39m::filter()
[31mx[39m [34mdplyr[39m::[32mlag()[39m    masks [34mstats[39m::lag()
[36mi[39m Use the conflicted package ([3m[34m<http://conflicted.r-lib.org/>[39m[23m) to force all conflicts to become errors

Attaching package: 'data.table'


The following objects are masked from 'package:lubridate':

    hour, isoweek, mday, minute, month, quarter, second, wday, week,
    y

In [2]:
setwd("~/")
getwd()

## sQTL

In [34]:
meta_file = fread("project/image_QTL/DNE_cb_explained_by_xQTL/xQTL_top_loci.tsv")
meta_file = meta_file |> filter(Data_Type == "sQTL")
dim(meta_file)

In [35]:
# Assuming your meta_file dataframe is already loaded
# If not, you'd need to recreate it from your data

# Create a function to read a single file and add identifiers
read_topolci_file <- function(path, cohort, data_type, modality) {
  # Read the file - assuming tab-delimited with gzip compression
  data <- read_tsv(path, show_col_types = FALSE)
  
  # Add identifier columns
  data$Cohort <- cohort
  data$Data_Type <- data_type
  data$Modality <- modality
  
  # Create combined identifier
  data$Cohort_Data_Type_Modality <- paste(cohort, data_type, modality, sep = "_")
  
  return(data)
}

# Apply the function to each row in the meta_file
combined_data <- pmap_dfr(
  list(
    path = meta_file$Path,
    cohort = meta_file$Cohort,
    data_type = meta_file$Data_Type,
    modality = meta_file$Modality
  ),
  read_topolci_file
)

# Save the combined dataset if needed
#fwrite(combined_data, "combined_top_loci_data.tsv")

# Preview the result
head(combined_data)

#chr,start,end,a1,a2,variant_ID,gene_ID,event_ID,cs_coverage_0.95,cs_coverage_0.7,cs_coverage_0.5,PIP,conditional_effect,cluster,cat,ID,Cohort,Data_Type,Modality,Cohort_Data_Type_Modality
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,10396,10397,*,CCCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTCAG,chr1:10397:CCCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTCAG:*,ENSG00000158286,ROSMAP_AC_sQTL_chr1:6209539:6209928:clu_403_+:PR:ENSG00000158286,0,0,0,0.02901275,0.4162247,clu_403_,PR,chr1:6209539:6209928:clu_403_+:PR,ROSMAP,sQTL,AC,ROSMAP_sQTL_AC
1,10396,10397,*,CCCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTCAG,chr1:10397:CCCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCTAACCCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTCAG:*,ENSG00000248333,ROSMAP_AC_sQTL_chr1:1637202:1637407:clu_6616_-:PR:ENSG00000248333,0,0,0,0.04186011,-0.6134922,clu_6616_,PR,chr1:1637202:1637407:clu_6616_-:PR,ROSMAP,sQTL,AC,ROSMAP_sQTL_AC
1,10432,10433,ACCCTTAACCCCTAACCCCTAACC,ACCCTAACC,chr1:10433:ACCCTAACC:ACCCTTAACCCCTAACCCCTAACC,ENSG00000116213,ROSMAP_AC_sQTL_chr1:3647560:3649930:clu_6810_-:PR:ENSG00000116213,0,0,0,0.08295409,-1.2588737,clu_6810_,PR,chr1:3647560:3649930:clu_6810_-:PR,ROSMAP,sQTL,AC,ROSMAP_sQTL_AC
1,10437,10438,*,AAC,chr1:10438:AAC:*,ENSG00000160087,ROSMAP_AC_sQTL_chr1:1256181:1256991:clu_6523_-:UP:ENSG00000160087,0,0,0,0.02530852,0.7696662,clu_6523_,UP,chr1:1256181:1256991:clu_6523_-:UP,ROSMAP,sQTL,AC,ROSMAP_sQTL_AC
1,10439,10440,*,CCCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTCAG,chr1:10440:CCCCTAACCCTAACCCTAACCCTAACCCTCGCGGTACCCTCAG:*,ENSG00000078369,ROSMAP_AC_sQTL_chr1:1839238:1839628:clu_6656_-:NE:ENSG00000078369,0,0,0,0.02845004,-0.2279109,clu_6656_,NE,chr1:1839238:1839628:clu_6656_-:NE,ROSMAP,sQTL,AC,ROSMAP_sQTL_AC
1,10442,10443,TTAACCCTAACCCTAACCCT,CTAACCCTAACCCTAACCCT,chr1:10443:CTAACCCTAACCCTAACCCT:TTAACCCTAACCCTAACCCT,ENSG00000169972,ROSMAP_AC_sQTL_chr1:1309851:1310633:clu_127_+:PR:ENSG00000169972,0,0,0,0.02746254,-1.3924732,clu_127_,PR,chr1:1309851:1310633:clu_127_+:PR,ROSMAP,sQTL,AC,ROSMAP_sQTL_AC


In [52]:
three_overlap = readLines("project/image_QTL/interested_genes/CB_M1/DDX17_CBY1_var.txt")
three_overlap <- gsub('"', '', three_overlap)

In [53]:
three_overlap

In [59]:
SUN2_var = combined_data |> filter(variant_ID %in% three_overlap)
SUN2_var |> count(gene_ID)

gene_ID,n
<chr>,<int>
ENSG00000100211,5
ENSG00000100221,4
ENSG00000184949,109
ENSG00000198792,23


In [55]:
SUN2_var

#chr,start,end,a1,a2,variant_ID,gene_ID,event_ID,cs_coverage_0.95,cs_coverage_0.7,cs_coverage_0.5,PIP,conditional_effect,cluster,cat,ID,Cohort,Data_Type,Modality,Cohort_Data_Type_Modality
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
22,38592109,38592110,G,A,chr22:38592110:A:G,ENSG00000100211,ROSMAP_AC_sQTL_chr22:38668132:38670766:clu_143018_+:UP:ENSG00000100211,0,0,0,0.021934321,-0.40321481,clu_143018_,UP,chr22:38668132:38670766:clu_143018_+:UP,ROSMAP,sQTL,AC,ROSMAP_sQTL_AC
22,38592109,38592110,G,A,chr22:38592110:A:G,ENSG00000100211,ROSMAP_AC_sQTL_chr22:38670989:38673158:clu_143019_+:UP:ENSG00000100211,0,0,0,0.001049308,-0.25552230,clu_143019_,UP,chr22:38670989:38673158:clu_143019_+:UP,ROSMAP,sQTL,AC,ROSMAP_sQTL_AC
22,38592109,38592110,G,A,chr22:38592110:A:G,ENSG00000100221,ROSMAP_AC_sQTL_chr22:38700123:38700965:clu_144722_-:NE:ENSG00000100221,0,0,0,0.002629362,0.04866929,clu_144722_,NE,chr22:38700123:38700965:clu_144722_-:NE,ROSMAP,sQTL,AC,ROSMAP_sQTL_AC
22,38597461,38597462,G,A,chr22:38597462:A:G,ENSG00000100211,ROSMAP_AC_sQTL_chr22:38668132:38670766:clu_143018_+:UP:ENSG00000100211,0,0,0,0.001920936,-0.35379653,clu_143018_,UP,chr22:38668132:38670766:clu_143018_+:UP,ROSMAP,sQTL,AC,ROSMAP_sQTL_AC
22,38597461,38597462,G,A,chr22:38597462:A:G,ENSG00000100211,ROSMAP_AC_sQTL_chr22:38670989:38673158:clu_143019_+:UP:ENSG00000100211,0,0,0,0.001279521,-0.27017207,clu_143019_,UP,chr22:38670989:38673158:clu_143019_+:UP,ROSMAP,sQTL,AC,ROSMAP_sQTL_AC
22,38617824,38617825,C,T,chr22:38617825:T:C,ENSG00000100211,ROSMAP_AC_sQTL_chr22:38668132:38670766:clu_143018_+:UP:ENSG00000100211,0,0,0,0.001075891,-0.31508327,clu_143018_,UP,chr22:38668132:38670766:clu_143018_+:UP,ROSMAP,sQTL,AC,ROSMAP_sQTL_AC
22,38717128,38717129,A,G,chr22:38717129:G:A,ENSG00000100221,ROSMAP_AC_sQTL_chr22:38700123:38700965:clu_144722_-:NE:ENSG00000100221,0,0,0,0.002035720,0.04965771,clu_144722_,NE,chr22:38700123:38700965:clu_144722_-:NE,ROSMAP,sQTL,AC,ROSMAP_sQTL_AC
22,38717897,38717898,T,C,chr22:38717898:C:T,ENSG00000100221,ROSMAP_AC_sQTL_chr22:38700123:38700965:clu_144722_-:NE:ENSG00000100221,0,0,0,0.002035720,0.04965771,clu_144722_,NE,chr22:38700123:38700965:clu_144722_-:NE,ROSMAP,sQTL,AC,ROSMAP_sQTL_AC
22,38754444,38754445,A,G,chr22:38754445:G:A,ENSG00000100221,ROSMAP_AC_sQTL_chr22:38700123:38700965:clu_144722_-:NE:ENSG00000100221,0,0,0,0.002122077,0.04997759,clu_144722_,NE,chr22:38700123:38700965:clu_144722_-:NE,ROSMAP,sQTL,AC,ROSMAP_sQTL_AC
22,38578148,38578149,A,C,chr22:38578149:C:A,ENSG00000184949,ROSMAP_DLPFC_sQTL_chr22:38591540:38594026:clu_173584_-:UP:ENSG00000184949,0,0,1,0.009644639,-0.18531622,clu_173584_,UP,chr22:38591540:38594026:clu_173584_-:UP,ROSMAP,sQTL,DLPFC,ROSMAP_sQTL_DLPFC


In [56]:
CBY1_var = combined_data |> filter(variant_ID %in% three_overlap)

In [58]:
CBY1_var |> filter(gene_ID == "ENSG00000100211")

#chr,start,end,a1,a2,variant_ID,gene_ID,event_ID,cs_coverage_0.95,cs_coverage_0.7,cs_coverage_0.5,PIP,conditional_effect,cluster,cat,ID,Cohort,Data_Type,Modality,Cohort_Data_Type_Modality
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
22,38592109,38592110,G,A,chr22:38592110:A:G,ENSG00000100211,ROSMAP_AC_sQTL_chr22:38668132:38670766:clu_143018_+:UP:ENSG00000100211,0,0,0,0.021934321,-0.4032148,clu_143018_,UP,chr22:38668132:38670766:clu_143018_+:UP,ROSMAP,sQTL,AC,ROSMAP_sQTL_AC
22,38592109,38592110,G,A,chr22:38592110:A:G,ENSG00000100211,ROSMAP_AC_sQTL_chr22:38670989:38673158:clu_143019_+:UP:ENSG00000100211,0,0,0,0.001049308,-0.2555223,clu_143019_,UP,chr22:38670989:38673158:clu_143019_+:UP,ROSMAP,sQTL,AC,ROSMAP_sQTL_AC
22,38597461,38597462,G,A,chr22:38597462:A:G,ENSG00000100211,ROSMAP_AC_sQTL_chr22:38668132:38670766:clu_143018_+:UP:ENSG00000100211,0,0,0,0.001920936,-0.3537965,clu_143018_,UP,chr22:38668132:38670766:clu_143018_+:UP,ROSMAP,sQTL,AC,ROSMAP_sQTL_AC
22,38597461,38597462,G,A,chr22:38597462:A:G,ENSG00000100211,ROSMAP_AC_sQTL_chr22:38670989:38673158:clu_143019_+:UP:ENSG00000100211,0,0,0,0.001279521,-0.2701721,clu_143019_,UP,chr22:38670989:38673158:clu_143019_+:UP,ROSMAP,sQTL,AC,ROSMAP_sQTL_AC
22,38617824,38617825,C,T,chr22:38617825:T:C,ENSG00000100211,ROSMAP_AC_sQTL_chr22:38668132:38670766:clu_143018_+:UP:ENSG00000100211,0,0,0,0.001075891,-0.3150833,clu_143018_,UP,chr22:38668132:38670766:clu_143018_+:UP,ROSMAP,sQTL,AC,ROSMAP_sQTL_AC


In [67]:
DDX17_var = combined_data |> filter(variant_ID %in% three_overlap)

In [71]:
DDX17_var |> filter(gene_ID == "ENSG00000100201")

#chr,start,end,a1,a2,variant_ID,gene_ID,event_ID,cs_coverage_0.95,cs_coverage_0.7,cs_coverage_0.5,PIP,conditional_effect,cluster,cat,ID,Cohort,Data_Type,Modality,Cohort_Data_Type_Modality
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>


## eQTL

In [28]:
meta_file = fread("project/image_QTL/DNE_cb_explained_by_xQTL/xQTL_top_loci.tsv")
meta_file = meta_file |> filter(Data_Type == "eQTL" & Cohort == "ROSMAP" & Method == "single_context_finemapping")
meta_file

Data_Type,Cohort,Modality,Method,Path
<chr>,<chr>,<chr>,<chr>,<chr>
eQTL,ROSMAP,AC,single_context_finemapping,data/analysis_result/single_context/ROSMAP_eQTL/export/summary/context_specific/AC_DeJager_eQTL.exported.toploci.bed.gz
eQTL,ROSMAP,Ast_10_Kellis,single_context_finemapping,data/analysis_result/single_context/ROSMAP_eQTL/export/summary/context_specific/Ast_10_Kellis_eQTL.exported.toploci.bed.gz
eQTL,ROSMAP,Ast_DeJager,single_context_finemapping,data/analysis_result/single_context/ROSMAP_eQTL/export/summary/context_specific/Ast_DeJager_eQTL.exported.toploci.bed.gz
eQTL,ROSMAP,Ast_Kellis,single_context_finemapping,data/analysis_result/single_context/ROSMAP_eQTL/export/summary/context_specific/Ast_Kellis_eQTL.exported.toploci.bed.gz
eQTL,ROSMAP,Ast_mega,single_context_finemapping,data/analysis_result/single_context/ROSMAP_eQTL/export/summary/context_specific/Ast_mega_eQTL.exported.toploci.bed.gz
eQTL,ROSMAP,DLPFC,single_context_finemapping,data/analysis_result/single_context/ROSMAP_eQTL/export/summary/context_specific/DLPFC_DeJager_eQTL.exported.toploci.bed.gz
eQTL,ROSMAP,Exc_DeJager,single_context_finemapping,data/analysis_result/single_context/ROSMAP_eQTL/export/summary/context_specific/Exc_DeJager_eQTL.exported.toploci.bed.gz
eQTL,ROSMAP,Exc_Kellis,single_context_finemapping,data/analysis_result/single_context/ROSMAP_eQTL/export/summary/context_specific/Exc_Kellis_eQTL.exported.toploci.bed.gz
eQTL,ROSMAP,Exc_mega,single_context_finemapping,data/analysis_result/single_context/ROSMAP_eQTL/export/summary/context_specific/Exc_mega_eQTL.exported.toploci.bed.gz
eQTL,ROSMAP,Inh_DeJager,single_context_finemapping,data/analysis_result/single_context/ROSMAP_eQTL/export/summary/context_specific/Inh_DeJager_eQTL.exported.toploci.bed.gz


In [29]:
# Assuming your meta_file dataframe is already loaded
# If not, you'd need to recreate it from your data

# Create a function to read a single file and add identifiers
read_topolci_file <- function(path, cohort, data_type, modality) {
  # Read the file - assuming tab-delimited with gzip compression
  data <- read_tsv(path, show_col_types = FALSE)
  
  # Add identifier columns
  data$Cohort <- cohort
  data$Data_Type <- data_type
  data$Modality <- modality
  
  # Create combined identifier
  data$Cohort_Data_Type_Modality <- paste(cohort, data_type, modality, sep = "_")
  
  return(data)
}

# Apply the function to each row in the meta_file
combined_data <- pmap_dfr(
  list(
    path = meta_file$Path,
    cohort = meta_file$Cohort,
    data_type = meta_file$Data_Type,
    modality = meta_file$Modality
  ),
  read_topolci_file
)

# Save the combined dataset if needed
#fwrite(combined_data, "combined_top_loci_data.tsv")

# Preview the result
head(combined_data)

#chr,start,end,a1,a2,variant_ID,gene_ID,event_ID,cs_coverage_0.95,cs_coverage_0.7,cs_coverage_0.5,PIP,conditional_effect,Cohort,Data_Type,Modality,Cohort_Data_Type_Modality
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
1,99088,99089,*,TTTC,chr1:99089:TTTC:*,ENSG00000196581,AC_DeJager_eQTL_ENSG00000196581,0,0,0,0.14741504,0.3104743,ROSMAP,eQTL,AC,ROSMAP_eQTL_AC
1,600805,600806,C,G,chr1:600806:G:C,ENSG00000198912,AC_DeJager_eQTL_ENSG00000198912,0,0,0,0.23786542,-1.094806,ROSMAP,eQTL,AC,ROSMAP_eQTL_AC
1,602262,602263,*,ATCC,chr1:602263:ATCC:*,ENSG00000187730,AC_DeJager_eQTL_ENSG00000187730,0,0,0,0.03324526,0.3465442,ROSMAP,eQTL,AC,ROSMAP_eQTL_AC
1,602277,602278,*,TCCCTCTCCTTTCTCCTCTCTAGCC,chr1:602278:TCCCTCTCCTTTCTCCTCTCTAGCC:*,ENSG00000187730,AC_DeJager_eQTL_ENSG00000187730,0,0,0,0.0776944,0.3721831,ROSMAP,eQTL,AC,ROSMAP_eQTL_AC
1,610727,610728,T,C,chr1:610728:C:T,ENSG00000127054,AC_DeJager_eQTL_ENSG00000127054,0,0,0,0.04314056,-0.4321317,ROSMAP,eQTL,AC,ROSMAP_eQTL_AC
1,618361,618362,A,G,chr1:618362:G:A,ENSG00000196581,AC_DeJager_eQTL_ENSG00000196581,0,0,0,0.02578836,-0.0710322,ROSMAP,eQTL,AC,ROSMAP_eQTL_AC


In [31]:
a = combined_data |> filter(str_detect(event_ID, "ENSG00000100211"))
a |> filter(variant_ID %in% var_PR_id)

#chr,start,end,a1,a2,variant_ID,gene_ID,event_ID,cs_coverage_0.95,cs_coverage_0.7,cs_coverage_0.5,PIP,conditional_effect,Cohort,Data_Type,Modality,Cohort_Data_Type_Modality
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>


## TWAS

In [74]:
AD_twas = fread("/home/ubuntu/data/analysis_result/twas/export/summary/FunGen_twas.exported.bed.gz")

In [75]:
head(AD_twas)
dim(AD_twas)

#chr,start,end,molecular_id,TADB_start,TADB_end,context,gwas_study,method,is_imputable,is_selected_method,rsq_cv,pval_cv,twas_z,twas_pval,type,block
<int>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
1,923921,923922,ENSG00000187634,0,6480000,AC_DeJager_eQTL,Bellenguez_2022,bayes_l,True,False,0.06358278,4.713139e-10,-0.5516557,0.5811842,eQTL,chr1_16103_2888443
1,923921,923922,ENSG00000187634,0,6480000,AC_DeJager_eQTL,Bellenguez_2022,bayes_r,True,True,0.09006772,8.293727e-14,-0.6079598,0.5432142,eQTL,chr1_16103_2888443
1,923921,923922,ENSG00000187634,0,6480000,AC_DeJager_eQTL,Bellenguez_2022,enet,True,False,0.07363354,1.811244e-11,-0.8264034,0.4085753,eQTL,chr1_16103_2888443
1,923921,923922,ENSG00000187634,0,6480000,AC_DeJager_eQTL,Bellenguez_2022,lasso,True,False,0.07175097,3.341192e-11,-0.7787284,0.4361397,eQTL,chr1_16103_2888443
1,923921,923922,ENSG00000187634,0,6480000,AC_DeJager_eQTL,Bellenguez_2022,mrash,True,False,0.08157779,1.353045e-12,-0.1803822,0.8568525,eQTL,chr1_16103_2888443
1,923921,923922,ENSG00000187634,0,6480000,AC_DeJager_eQTL,Bellenguez_2022,mrmash,True,False,0.0774621,5.199122e-12,-1.4978502,0.1341722,eQTL,chr1_16103_2888443


In [80]:
#LATS1
AD_twas |> filter(molecular_id == "ENSG00000131023" & twas_pval < 7*1e-4)


#chr,start,end,molecular_id,TADB_start,TADB_end,context,gwas_study,method,is_imputable,is_selected_method,rsq_cv,pval_cv,twas_z,twas_pval,type,block
<int>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
6,149718103,149718104,ENSG00000131023,148560000,151480000,BM_10_MSBB_eQTL,Bellenguez_2022,bayes_l,True,False,0.08935199,4.629106e-07,-3.47854,0.0005041541,eQTL,chr6_148538997_149932268
6,149718103,149718104,ENSG00000131023,148560000,151480000,BM_10_MSBB_eQTL,Bellenguez_2022,bayes_r,True,False,0.08118388,1.630121e-06,-3.476076,0.0005088085,eQTL,chr6_148538997_149932268
6,149718103,149718104,ENSG00000131023,148560000,151480000,BM_10_MSBB_eQTL,Bellenguez_2022,enet,True,False,0.09014878,4.092624e-07,-3.770504,0.0001629181,eQTL,chr6_148538997_149932268
6,149718103,149718104,ENSG00000131023,148560000,151480000,BM_10_MSBB_eQTL,Bellenguez_2022,lasso,True,False,0.07163579,7.044287e-06,-3.685689,0.0002280848,eQTL,chr6_148538997_149932268
6,149718103,149718104,ENSG00000131023,148560000,151480000,BM_10_MSBB_eQTL,Bellenguez_2022,mrash,True,False,0.09878373,1.072101e-07,-3.495838,0.000472575,eQTL,chr6_148538997_149932268
6,149718103,149718104,ENSG00000131023,148560000,151480000,BM_22_MSBB_eQTL,Wightman_Excluding23andMe_2021,lasso,True,False,0.11429018,3.29728e-08,-3.409129,0.0006517067,eQTL,chr6_148538997_149932268
6,149718103,149718104,ENSG00000131023,148560000,151480000,DLPFC_DeJager_eQTL,Bellenguez_2022,bayes_l,True,False,0.04338313,3.923306e-09,-3.758877,0.000170678,eQTL,chr6_148538997_149932268
6,149718103,149718104,ENSG00000131023,148560000,151480000,DLPFC_DeJager_eQTL,Bellenguez_2022,bayes_r,True,False,0.04495337,2.029381e-09,-3.734796,0.0001878675,eQTL,chr6_148538997_149932268
6,149718103,149718104,ENSG00000131023,148560000,151480000,DLPFC_DeJager_eQTL,Bellenguez_2022,mrash,True,False,0.04608768,1.26014e-09,-3.801566,0.0001437845,eQTL,chr6_148538997_149932268
6,149718103,149718104,ENSG00000131023,148560000,151480000,Inh_mega_eQTL,Bellenguez_2022,enet,True,False,0.13022984,2.820083e-24,-3.452441,0.0005555381,eQTL,chr6_148538997_149932268


In [81]:
AD_twas |> filter(molecular_id == "ENSG00000131023" & twas_pval < 1.4*1e-3 & str_detect(context, "sQTL"))

#chr,start,end,molecular_id,TADB_start,TADB_end,context,gwas_study,method,is_imputable,is_selected_method,rsq_cv,pval_cv,twas_z,twas_pval,type,block
<int>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
6,149718103,149718104,ENSG00000131023,148560000,151480000,ROSMAP_AC_sQTL_chr6:149662238:149676259:clu_52799_-:PR:ENSG00000131023,Bellenguez_2022,mrash,True,True,0.1328771,1.3182489999999999e-20,3.396173,0.0006833519,productive_sQTL,chr6_148538997_149932268
6,149718103,149718104,ENSG00000131023,148560000,151480000,ROSMAP_AC_sQTL_chr6:149662238:149676259:clu_52799_-:PR:ENSG00000131023,Bellenguez_2022,susie,True,False,0.1308235,2.7037409999999998e-20,3.353792,0.0007971229,productive_sQTL,chr6_148538997_149932268


In [75]:
# SUN2
AD_twas |> filter(molecular_id == "ENSG00000145901" & twas_pval < 4.6*1e-5)

#chr,start,end,molecular_id,TADB_start,TADB_end,context,gwas_study,method,is_imputable,is_selected_method,rsq_cv,pval_cv,twas_z,twas_pval,type,block
<int>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
5,151093575,151093576,ENSG00000145901,147240000,154960000,DLPFC_DeJager_eQTL,Wightman_Full_2021,enet,True,False,0.008174572,0.01131803,4.552765,5.294536e-06,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,DLPFC_DeJager_eQTL,Wightman_Full_2021,lasso,True,False,0.017195393,0.0002316618,4.359841,1.301569e-05,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Bellenguez_2022,bayes_r,True,False,0.020103673,0.003633386,4.797897,1.603403e-06,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Bellenguez_2022,enet,True,False,0.025244056,0.001101203,4.342834,1.406566e-05,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Bellenguez_2022,lasso,True,False,0.036256777,8.784071e-05,4.51003,6.481846e-06,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Bellenguez_2022,mrash,True,False,0.007085456,0.08526346,4.777922,1.771162e-06,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Bellenguez_2022,susie,True,False,0.018211856,0.005660959,4.796213,1.616935e-06,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Wightman_Excluding23andMe_2021,bayes_r,True,False,0.020103673,0.003633386,5.70037,1.195476e-08,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Wightman_Excluding23andMe_2021,enet,True,False,0.025244056,0.001101203,5.967643,2.407055e-09,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Wightman_Excluding23andMe_2021,lasso,True,False,0.036256777,8.784071e-05,6.016836,1.778594e-09,eQTL,chr5_149895776_152257891


In [None]:
## 

In [27]:
#TNIP1-aging2-ad2-ad
a |> filter(molecular_id == "ENSG00000145901" & str_detect(context, "Exc"))

#chr,start,end,molecular_id,TADB_start,TADB_end,context,gwas_study,method,is_imputable,is_selected_method,rsq_cv,pval_cv,twas_z,twas_pval,type,block
<int>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Bellenguez_2022,bayes_r,True,False,0.020103673,0.003633386,4.797897,1.603403e-06,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Bellenguez_2022,mrash,True,False,0.007085456,0.08526346,4.777922,1.771162e-06,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Bellenguez_2022,susie,True,False,0.018211856,0.005660959,4.796213,1.616935e-06,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Wightman_Excluding23andMe_2021,bayes_r,True,False,0.020103673,0.003633386,5.70037,1.195476e-08,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Wightman_Excluding23andMe_2021,enet,True,False,0.025244056,0.001101203,5.967643,2.407055e-09,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Wightman_Excluding23andMe_2021,lasso,True,False,0.036256777,8.784071e-05,6.016836,1.778594e-09,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Wightman_Excluding23andMe_2021,mrash,True,False,0.007085456,0.08526346,5.712165,1.115479e-08,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Wightman_Excluding23andMe_2021,susie,True,False,0.018211856,0.005660959,5.753083,8.76305e-09,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Wightman_ExcludingUKBand23andME_2021,enet,True,False,0.025244056,0.001101203,4.785963,1.701694e-06,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Wightman_ExcludingUKBand23andME_2021,lasso,True,False,0.036256777,8.784071e-05,4.732478,2.217956e-06,eQTL,chr5_149895776_152257891


In [62]:
## DDX17
AD_twas |> filter(molecular_id == "ENSG00000100201" & twas_pval < 4*1e-6)

#chr,start,end,molecular_id,TADB_start,TADB_end,context,gwas_study,method,is_imputable,is_selected_method,rsq_cv,pval_cv,twas_z,twas_pval,type,block
<int>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
22,38507658,38507659,ENSG00000100201,37483438,41680000,Ast_mega_eQTL,Wightman_Full_2021,enet,True,False,0.01906245,0.0001000798,-4.712295,2.449422e-06,eQTL,chr22_37320379_39179317
22,38507658,38507659,ENSG00000100201,37483438,41680000,Oli_DeJager_eQTL,Wightman_Full_2021,lasso,True,False,0.017625,0.006499615,-4.648857,3.337798e-06,eQTL,chr22_37320379_39179317


In [34]:
## ddx17
AD_twas |> filter(molecular_id == "ENSG00000100201" & twas_pval < 1e-4)

#chr,start,end,molecular_id,TADB_start,TADB_end,context,gwas_study,method,is_imputable,is_selected_method,rsq_cv,pval_cv,twas_z,twas_pval,type,block
<int>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
22,38507658,38507659,ENSG00000100201,37483438,41680000,AC_DeJager_eQTL,Wightman_Excluding23andMe_2021,mrmash,True,True,0.02021946,0.0005146089,-4.05071,5.106245e-05,eQTL,chr22_37320379_39179317
22,38507658,38507659,ENSG00000100201,37483438,41680000,AC_DeJager_eQTL,Wightman_Full_2021,mrmash,True,True,0.02021946,0.0005146089,-4.057508,4.959909e-05,eQTL,chr22_37320379_39179317
22,38507658,38507659,ENSG00000100201,37483438,41680000,AC_DeJager_eQTL,Wightman_Full_2021,susie,True,False,4.720863e-07,0.9866789,-3.906483,9.364906e-05,eQTL,chr22_37320379_39179317
22,38507658,38507659,ENSG00000100201,37483438,41680000,Ast_DeJager_eQTL,Wightman_Excluding23andMe_2021,mrmash,True,False,0.02870024,0.000496548,-4.05557,5.001209e-05,eQTL,chr22_37320379_39179317
22,38507658,38507659,ENSG00000100201,37483438,41680000,Ast_DeJager_eQTL,Wightman_Full_2021,mrmash,True,False,0.02870024,0.000496548,-4.05598,4.992448e-05,eQTL,chr22_37320379_39179317
22,38507658,38507659,ENSG00000100201,37483438,41680000,Ast_DeJager_eQTL,Wightman_Full_2021,susie,True,False,0.02343935,0.00167198,-4.180768,2.905261e-05,eQTL,chr22_37320379_39179317
22,38507658,38507659,ENSG00000100201,37483438,41680000,Ast_mega_eQTL,Wightman_Excluding23andMe_2021,enet,True,False,0.01906245,0.0001000798,-4.567469,4.936495e-06,eQTL,chr22_37320379_39179317
22,38507658,38507659,ENSG00000100201,37483438,41680000,Ast_mega_eQTL,Wightman_Full_2021,enet,True,False,0.01906245,0.0001000798,-4.712295,2.449422e-06,eQTL,chr22_37320379_39179317
22,38507658,38507659,ENSG00000100201,37483438,41680000,Ast_mega_eQTL,Wightman_Full_2021,mrash,True,False,0.008639478,0.006625041,-4.252741,2.111694e-05,eQTL,chr22_37320379_39179317
22,38507658,38507659,ENSG00000100201,37483438,41680000,Ast_mega_eQTL,Wightman_Full_2021,susie,True,False,0.01216314,0.001576276,-4.131411,3.605433e-05,eQTL,chr22_37320379_39179317


In [40]:
# FAM227A
AD_twas |> filter(molecular_id == "ENSG00000184949" & twas_pval < 3*1e-5)


#chr,start,end,molecular_id,TADB_start,TADB_end,context,gwas_study,method,is_imputable,is_selected_method,rsq_cv,pval_cv,twas_z,twas_pval,type,block
<int>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
22,38656627,38656628,ENSG00000184949,37520000,41680000,BM_10_MSBB_eQTL,Wightman_Full_2021,enet,True,False,0.196906546,1.199699e-14,-4.323725,1.534164e-05,eQTL,chr22_37320379_39179317
22,38656627,38656628,ENSG00000184949,37520000,41680000,BM_10_MSBB_eQTL,Wightman_Full_2021,lasso,True,False,0.166017335,2.207526e-12,-4.462665,8.094639e-06,eQTL,chr22_37320379_39179317
22,38656627,38656628,ENSG00000184949,37520000,41680000,BM_10_MSBB_eQTL,Wightman_Full_2021,mrmash,True,False,0.180890664,1.832901e-13,-4.548809,5.395041e-06,eQTL,chr22_37320379_39179317
22,38656627,38656628,ENSG00000184949,37520000,41680000,BM_22_MSBB_eQTL,Wightman_Full_2021,enet,True,False,0.119949091,1.437276e-08,-4.221782,2.423785e-05,eQTL,chr22_37320379_39179317
22,38656627,38656628,ENSG00000184949,37520000,41680000,BM_22_MSBB_eQTL,Wightman_Full_2021,mrmash,True,False,0.090534965,1.03202e-06,-4.558288,5.157217e-06,eQTL,chr22_37320379_39179317
22,38656627,38656628,ENSG00000184949,37520000,41680000,BM_36_MSBB_eQTL,Wightman_Full_2021,mrmash,True,False,0.129299141,1.99461e-08,-4.510455,6.468877e-06,eQTL,chr22_37320379_39179317
22,38656627,38656628,ENSG00000184949,37520000,41680000,BM_44_MSBB_eQTL,Wightman_Full_2021,enet,True,False,0.153256334,8.3554e-11,-4.293006,1.762702e-05,eQTL,chr22_37320379_39179317
22,38656627,38656628,ENSG00000184949,37520000,41680000,BM_44_MSBB_eQTL,Wightman_Full_2021,lasso,True,False,0.116503656,2.098436e-08,-4.413747,1.015965e-05,eQTL,chr22_37320379_39179317
22,38656627,38656628,ENSG00000184949,37520000,41680000,BM_44_MSBB_eQTL,Wightman_Full_2021,mrmash,True,False,0.197048622,8.723368e-14,-4.529075,5.924233e-06,eQTL,chr22_37320379_39179317
22,38656627,38656628,ENSG00000184949,37520000,41680000,DLPFC_DeJager_eQTL,Wightman_Full_2021,enet,True,False,0.239485757,1.891433e-48,-4.206179,2.59725e-05,eQTL,chr22_37320379_39179317


In [84]:
#CBY1
AD_twas |> filter(molecular_id == "ENSG00000100211" & twas_pval < 7*1e-4)


#chr,start,end,molecular_id,TADB_start,TADB_end,context,gwas_study,method,is_imputable,is_selected_method,rsq_cv,pval_cv,twas_z,twas_pval,type,block
<int>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
22,38656638,38656639,ENSG00000100211,37520000,41680000,BM_22_MSBB_eQTL,Wightman_Full_2021,mrash,TRUE,FALSE,0.0315402759,4.524751e-03,3.673112,2.396147e-04,eQTL,chr22_37320379_39179317
22,38656638,38656639,ENSG00000100211,37520000,41680000,BM_22_MSBB_eQTL,Wightman_Full_2021,mrmash,TRUE,FALSE,0.0129941402,6.972603e-02,3.704533,2.117804e-04,eQTL,chr22_37320379_39179317
22,38656638,38656639,ENSG00000100211,37520000,41680000,BM_22_MSBB_eQTL,Wightman_Full_2021,mvsusie,TRUE,FALSE,0.0119059635,8.263344e-02,3.758773,1.707484e-04,eQTL,chr22_37320379_39179317
22,38656638,38656639,ENSG00000100211,37520000,41680000,BM_22_MSBB_eQTL,Wightman_Full_2021,susie,TRUE,FALSE,0.0120219885,8.114271e-02,3.759069,1.705470e-04,eQTL,chr22_37320379_39179317
22,38656638,38656639,ENSG00000100211,37520000,41680000,BM_44_MSBB_eQTL,Wightman_Full_2021,mvsusie,TRUE,FALSE,0.0007966452,6.530869e-01,3.699800,2.157691e-04,eQTL,chr22_37320379_39179317
22,38656638,38656639,ENSG00000100211,37520000,41680000,DLPFC_DeJager_eQTL,Bellenguez_2022,bayes_l,TRUE,FALSE,0.0191041448,1.033576e-04,4.725888,2.291120e-06,eQTL,chr22_37320379_39179317
22,38656638,38656639,ENSG00000100211,37520000,41680000,DLPFC_DeJager_eQTL,Bellenguez_2022,bayes_r,TRUE,FALSE,0.0290902101,1.561064e-06,4.600902,4.206652e-06,eQTL,chr22_37320379_39179317
22,38656638,38656639,ENSG00000100211,37520000,41680000,DLPFC_DeJager_eQTL,Bellenguez_2022,lasso,TRUE,FALSE,0.0276963714,2.798260e-06,3.638175,2.745773e-04,eQTL,chr22_37320379_39179317
22,38656638,38656639,ENSG00000100211,37520000,41680000,DLPFC_DeJager_eQTL,Bellenguez_2022,mrash,TRUE,FALSE,0.0224720380,2.502999e-05,4.688983,2.745667e-06,eQTL,chr22_37320379_39179317
22,38656638,38656639,ENSG00000100211,37520000,41680000,DLPFC_DeJager_eQTL,Bellenguez_2022,mrmash,TRUE,FALSE,0.0372376664,5.156683e-08,4.119814,3.791790e-05,eQTL,chr22_37320379_39179317


In [66]:
#tomm22
AD_twas |> filter(molecular_id == "ENSG00000100216" & twas_pval < 6.7*1e-5)

#chr,start,end,molecular_id,TADB_start,TADB_end,context,gwas_study,method,is_imputable,is_selected_method,rsq_cv,pval_cv,twas_z,twas_pval,type,block
<int>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
22,38681955,38681956,ENSG00000100216,37520000,41680000,AC_DeJager_eQTL,Wightman_Full_2021,enet,True,False,0.016992134,0.001466612,4.643109,3.432051e-06,eQTL,chr22_37320379_39179317
22,38681955,38681956,ENSG00000100216,37520000,41680000,AC_DeJager_eQTL,Wightman_Full_2021,lasso,True,False,0.021240453,0.000370056,4.419881,9.875515e-06,eQTL,chr22_37320379_39179317
22,38681955,38681956,ENSG00000100216,37520000,41680000,BM_10_MSBB_eQTL,Wightman_Full_2021,bayes_l,True,False,0.213693808,6.519005e-16,4.02265,5.754687e-05,eQTL,chr22_37320379_39179317
22,38681955,38681956,ENSG00000100216,37520000,41680000,BM_10_MSBB_eQTL,Wightman_Full_2021,bayes_r,True,False,0.222118119,1.478552e-16,4.197271,2.701509e-05,eQTL,chr22_37320379_39179317
22,38681955,38681956,ENSG00000100216,37520000,41680000,BM_10_MSBB_eQTL,Wightman_Full_2021,enet,True,False,0.206279849,2.376269e-15,4.16611,3.098413e-05,eQTL,chr22_37320379_39179317
22,38681955,38681956,ENSG00000100216,37520000,41680000,BM_10_MSBB_eQTL,Wightman_Full_2021,lasso,True,False,0.206882837,2.139906e-15,4.203544,2.627682e-05,eQTL,chr22_37320379_39179317
22,38681955,38681956,ENSG00000100216,37520000,41680000,BM_10_MSBB_eQTL,Wightman_Full_2021,mrash,True,False,0.21570425,4.581456e-16,4.133889,3.566759e-05,eQTL,chr22_37320379_39179317
22,38681955,38681956,ENSG00000100216,37520000,41680000,BM_10_MSBB_eQTL,Wightman_Full_2021,mrmash,True,False,0.161261446,4.850896e-12,4.003724,6.235318e-05,eQTL,chr22_37320379_39179317
22,38681955,38681956,ENSG00000100216,37520000,41680000,BM_10_MSBB_eQTL,Wightman_Full_2021,susie,True,True,0.257355837,2.516021e-19,4.011288,6.03885e-05,eQTL,chr22_37320379_39179317
22,38681955,38681956,ENSG00000100216,37520000,41680000,BM_22_MSBB_eQTL,Wightman_Full_2021,mrmash,True,False,0.051340825,0.0002717636,3.993221,6.518172e-05,eQTL,chr22_37320379_39179317


In [89]:
#JOSD1
AD_twas |> filter(molecular_id == "ENSG00000100221" & twas_pval < 7*1e-5)


#chr,start,end,molecular_id,TADB_start,TADB_end,context,gwas_study,method,is_imputable,is_selected_method,rsq_cv,pval_cv,twas_z,twas_pval,type,block
<int>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
22,38701554,38701555,ENSG00000100221,37520000,41680000,PCC_DeJager_eQTL,Wightman_Full_2021,enet,True,False,0.001722245,0.3846284,4.439344,9.023357e-06,eQTL,chr22_37320379_39179317
22,38701554,38701555,ENSG00000100221,37520000,41680000,PCC_DeJager_eQTL,Wightman_Full_2021,lasso,True,False,0.001522126,0.4137634,4.41776,9.972902e-06,eQTL,chr22_37320379_39179317


In [64]:
# SUN2
AD_twas |> filter(molecular_id == "ENSG00000100242" & twas_pval < 5.8*1e-5)



#chr,start,end,molecular_id,TADB_start,TADB_end,context,gwas_study,method,is_imputable,is_selected_method,rsq_cv,pval_cv,twas_z,twas_pval,type,block
<int>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
22,38794141,38794142,ENSG00000100242,37520000,41680000,BM_10_MSBB_eQTL,Wightman_Full_2021,enet,True,False,0.03337578,0.002398755,-4.154224,3.263931e-05,eQTL,chr22_37320379_39179317
22,38794141,38794142,ENSG00000100242,37520000,41680000,BM_10_MSBB_eQTL,Wightman_Full_2021,susie,True,True,0.055955204,7.687698e-05,-4.184503,2.857909e-05,eQTL,chr22_37320379_39179317
22,38794141,38794142,ENSG00000100242,37520000,41680000,Mic_DeJager_eQTL,Wightman_Full_2021,mrash,True,False,0.007813765,0.07067733,-4.08822,4.346967e-05,eQTL,chr22_37320379_39179317
22,38794141,38794142,ENSG00000100242,37520000,41680000,Oli_DeJager_eQTL,Bellenguez_2022,bayes_l,True,False,0.039275617,4.405227e-05,4.089962,4.314436e-05,eQTL,chr22_37320379_39179317


In [34]:
#MAPK3
a |> filter(molecular_id == "ENSG00000102882")


#chr,start,end,molecular_id,TADB_start,TADB_end,context,gwas_study,method,is_imputable,is_selected_method,rsq_cv,pval_cv,twas_z,twas_pval,type,block
<int>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
16,30123504,30123505,ENSG00000102882,28360000,34000000,AC_DeJager_eQTL,Bellenguez_2022,bayes_l,True,False,0.013291301,0.004940341,7.395793,1.40567e-13,eQTL,chr16_29685831_46381513
16,30123504,30123505,ENSG00000102882,28360000,34000000,AC_DeJager_eQTL,Bellenguez_2022,bayes_r,True,False,0.034831153,4.752336e-06,5.387521,7.143597e-08,eQTL,chr16_29685831_46381513
16,30123504,30123505,ENSG00000102882,28360000,34000000,AC_DeJager_eQTL,Bellenguez_2022,enet,True,False,0.020257362,0.0005083416,6.104936,1.028417e-09,eQTL,chr16_29685831_46381513
16,30123504,30123505,ENSG00000102882,28360000,34000000,AC_DeJager_eQTL,Bellenguez_2022,lasso,True,False,0.033669344,6.887558e-06,5.661128,1.503815e-08,eQTL,chr16_29685831_46381513
16,30123504,30123505,ENSG00000102882,28360000,34000000,AC_DeJager_eQTL,Bellenguez_2022,mrash,True,False,0.019179082,0.0007206259,7.449552,9.365788e-14,eQTL,chr16_29685831_46381513
16,30123504,30123505,ENSG00000102882,28360000,34000000,AC_DeJager_eQTL,Bellenguez_2022,mvsusie,True,False,0.043630143,2.85908e-07,4.974667,6.535998e-07,eQTL,chr16_29685831_46381513
16,30123504,30123505,ENSG00000102882,28360000,34000000,AC_DeJager_eQTL,Bellenguez_2022,susie,True,True,0.054387397,9.110961e-09,6.157798,7.376351e-10,eQTL,chr16_29685831_46381513
16,30123504,30123505,ENSG00000102882,28360000,34000000,AC_DeJager_eQTL,Bellenguez_EADB_2022,bayes_l,True,False,0.013291301,0.004940341,4.856948,1.192089e-06,eQTL,chr16_29685831_46381513
16,30123504,30123505,ENSG00000102882,28360000,34000000,AC_DeJager_eQTL,Bellenguez_EADB_2022,mrash,True,False,0.019179082,0.0007206259,4.883777,1.040726e-06,eQTL,chr16_29685831_46381513
16,30123504,30123505,ENSG00000102882,28360000,34000000,Ast_DeJager_eQTL,Bellenguez_2022,mrash,True,False,0.00732921,0.08005119,4.795898,1.619476e-06,eQTL,chr16_29685831_46381513


In [35]:
#ATG13-dne-aging2
a |> filter(molecular_id == "ENSG00000175224")


#chr,start,end,molecular_id,TADB_start,TADB_end,context,gwas_study,method,is_imputable,is_selected_method,rsq_cv,pval_cv,twas_z,twas_pval,type,block
<int>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
11,46617525,46617526,ENSG00000175224,45617526,48440000,ROSMAP_PCC_sQTL_chr11:46665519:46668498:clu_74957_+:UP:ENSG00000175224,Kunkle_Stage1_2019,enet,True,False,0.015332353,0.004952496,-5.591181,2.255309e-08,unproductive_sQTL,chr11_44270671_46916060
11,46617525,46617526,ENSG00000175224,45617526,48440000,ROSMAP_PCC_sQTL_chr11:46665519:46668498:clu_74957_+:UP:ENSG00000175224,Kunkle_Stage1_2019,susie,True,False,-0.0008542556,0.43234941,-5.514842,3.490938e-08,unproductive_sQTL,chr11_44270671_46916060
11,46617525,46617526,ENSG00000175224,45617526,48440000,ROSMAP_PCC_sQTL_chr11:46665519:46668498:clu_74957_+:UP:ENSG00000175224,Wightman_ExcludingUKBand23andME_2021,enet,True,False,0.015332353,0.004952496,-6.329562,2.458577e-10,unproductive_sQTL,chr11_44270671_46916060
11,46617525,46617526,ENSG00000175224,45617526,48440000,ROSMAP_PCC_sQTL_chr11:46665519:46668498:clu_74957_+:UP:ENSG00000175224,Wightman_ExcludingUKBand23andME_2021,lasso,True,True,0.0161209692,0.004065225,-4.888531,1.01591e-06,unproductive_sQTL,chr11_44270671_46916060
11,46617525,46617526,ENSG00000175224,45617526,48440000,ROSMAP_PCC_sQTL_chr11:46665519:46668498:clu_74957_+:UP:ENSG00000175224,Wightman_ExcludingUKBand23andME_2021,mrash,True,False,0.0089571781,0.025126112,-4.762619,1.91096e-06,unproductive_sQTL,chr11_44270671_46916060
11,46617525,46617526,ENSG00000175224,45617526,48440000,ROSMAP_PCC_sQTL_chr11:46665519:46668498:clu_74957_+:UP:ENSG00000175224,Wightman_ExcludingUKBand23andME_2021,susie,True,False,-0.0008542556,0.43234941,-6.445304,1.153687e-10,unproductive_sQTL,chr11_44270671_46916060


In [36]:
#KANSL1-PD-aging3-ad
a |> filter(molecular_id == "ENSG00000120071")



#chr,start,end,molecular_id,TADB_start,TADB_end,context,gwas_study,method,is_imputable,is_selected_method,rsq_cv,pval_cv,twas_z,twas_pval,type,block
<int>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
17,46225387,46225388,ENSG00000120071,42560000,47225389,AC_DeJager_eQTL,Bellenguez_GRACE_2022,bayes_l,TRUE,FALSE,0.5398876,1.060405e-101,-5.515760,3.472765e-08,eQTL,chr17_45383525_50162864
17,46225387,46225388,ENSG00000120071,42560000,47225389,AC_DeJager_eQTL,Bellenguez_GRACE_2022,bayes_r,TRUE,FALSE,0.6233801,1.984079e-127,-5.217325,1.815257e-07,eQTL,chr17_45383525_50162864
17,46225387,46225388,ENSG00000120071,42560000,47225389,AC_DeJager_eQTL,Bellenguez_GRACE_2022,enet,TRUE,FALSE,0.5976925,5.945279e-119,-5.599983,2.143726e-08,eQTL,chr17_45383525_50162864
17,46225387,46225388,ENSG00000120071,42560000,47225389,AC_DeJager_eQTL,Bellenguez_GRACE_2022,lasso,TRUE,FALSE,0.6008172,5.920882e-120,-5.471668,4.458202e-08,eQTL,chr17_45383525_50162864
17,46225387,46225388,ENSG00000120071,42560000,47225389,AC_DeJager_eQTL,Bellenguez_GRACE_2022,mrash,TRUE,TRUE,0.6372958,2.891804e-132,-5.201417,1.977744e-07,eQTL,chr17_45383525_50162864
17,46225387,46225388,ENSG00000120071,42560000,47225389,AC_DeJager_eQTL,Bellenguez_GRACE_2022,mrmash,TRUE,FALSE,0.4371405,8.667837e-76,-5.000131,5.729135e-07,eQTL,chr17_45383525_50162864
17,46225387,46225388,ENSG00000120071,42560000,47225389,AC_DeJager_eQTL,Bellenguez_GRACE_2022,mvsusie,TRUE,FALSE,0.6350549,1.788000e-131,-5.074242,3.890441e-07,eQTL,chr17_45383525_50162864
17,46225387,46225388,ENSG00000120071,42560000,47225389,AC_DeJager_eQTL,Bellenguez_GRACE_2022,susie,TRUE,FALSE,0.6342944,3.309807e-131,-5.030917,4.881407e-07,eQTL,chr17_45383525_50162864
17,46225387,46225388,ENSG00000120071,42560000,47225389,Ast_DeJager_eQTL,Bellenguez_GRACE_2022,bayes_l,TRUE,FALSE,0.4893591,7.723771e-63,-5.302428,1.142725e-07,eQTL,chr17_45383525_50162864
17,46225387,46225388,ENSG00000120071,42560000,47225389,Ast_DeJager_eQTL,Bellenguez_GRACE_2022,bayes_r,TRUE,FALSE,0.5245072,2.602078e-69,-5.102209,3.357123e-07,eQTL,chr17_45383525_50162864


In [37]:
## MAPT
a |> filter(molecular_id == "ENSG00000186868")



#chr,start,end,molecular_id,TADB_start,TADB_end,context,gwas_study,method,is_imputable,is_selected_method,rsq_cv,pval_cv,twas_z,twas_pval,type,block
<int>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
17,45894525,45894526,ENSG00000186868,42560000,47028334,AC_DeJager_eQTL,Bellenguez_2022,mrash,TRUE,FALSE,0.01301435,5.414767e-03,-5.029104,4.927764e-07,eQTL,chr17_45383525_50162864
17,45894525,45894526,ENSG00000186868,42560000,47028334,AC_DeJager_eQTL,Bellenguez_2022,mrmash,TRUE,FALSE,0.01311749,5.232898e-03,-5.262979,1.417396e-07,eQTL,chr17_45383525_50162864
17,45894525,45894526,ENSG00000186868,42560000,47028334,AC_DeJager_eQTL,Bellenguez_2022,mvsusie,TRUE,FALSE,0.00858193,2.407107e-02,-5.489874,4.022211e-08,eQTL,chr17_45383525_50162864
17,45894525,45894526,ENSG00000186868,42560000,47028334,AC_DeJager_eQTL,Bellenguez_GRACE_2022,mrash,TRUE,FALSE,0.01301435,5.414767e-03,-5.030530,4.891261e-07,eQTL,chr17_45383525_50162864
17,45894525,45894526,ENSG00000186868,42560000,47028334,AC_DeJager_eQTL,Bellenguez_GRACE_2022,mrmash,TRUE,FALSE,0.01311749,5.232898e-03,-5.562121,2.665148e-08,eQTL,chr17_45383525_50162864
17,45894525,45894526,ENSG00000186868,42560000,47028334,AC_DeJager_eQTL,Bellenguez_GRACE_2022,mvsusie,TRUE,FALSE,0.00858193,2.407107e-02,-5.608505,2.040822e-08,eQTL,chr17_45383525_50162864
17,45894525,45894526,ENSG00000186868,42560000,47028334,AC_DeJager_eQTL,Wightman_ExcludingUKBand23andME_2021,mvsusie,TRUE,FALSE,0.00858193,2.407107e-02,-4.962521,6.958390e-07,eQTL,chr17_45383525_50162864
17,45894525,45894526,ENSG00000186868,42560000,47028334,AC_DeJager_eQTL,Wightman_Full_2021,mvsusie,TRUE,FALSE,0.00858193,2.407107e-02,-4.917322,8.773618e-07,eQTL,chr17_45383525_50162864
17,45894525,45894526,ENSG00000186868,42560000,47028334,Ast_DeJager_eQTL,Bellenguez_2022,bayes_l,TRUE,FALSE,0.32640905,1.130405e-37,-4.996739,5.830791e-07,eQTL,chr17_45383525_50162864
17,45894525,45894526,ENSG00000186868,42560000,47028334,Ast_DeJager_eQTL,Bellenguez_2022,bayes_r,TRUE,FALSE,0.32380350,2.538261e-37,-5.021289,5.132584e-07,eQTL,chr17_45383525_50162864


In [79]:
# KATNA1
AD_twas |> filter(molecular_id == "ENSG00000186625" & twas_pval < 8.8*1e-5)#567


#chr,start,end,molecular_id,TADB_start,TADB_end,context,gwas_study,method,is_imputable,is_selected_method,rsq_cv,pval_cv,twas_z,twas_pval,type,block
<int>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>


In [82]:
# LATS1-pleiotrophic -Aging2-AD
AD_twas |> filter(molecular_id == "ENSG00000131023"& twas_pval < 3.8*1e-6)


#chr,start,end,molecular_id,TADB_start,TADB_end,context,gwas_study,method,is_imputable,is_selected_method,rsq_cv,pval_cv,twas_z,twas_pval,type,block
<int>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>


In [6]:
sig = AD_twas |> filter(twas_pval < 0.05)
dim(sig)

In [24]:
# ddx17
sig |> filter(molecular_id == "ENSG00000100201" ) #& str_detect(context, "Exc") DDX17 various contexts

#chr,start,end,molecular_id,TADB_start,TADB_end,context,gwas_study,method,is_imputable,is_selected_method,rsq_cv,pval_cv,twas_z,twas_pval,type,block
<int>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
22,38507658,38507659,ENSG00000100201,37483438,41680000,AC_DeJager_eQTL,Bellenguez_2022,bayes_l,TRUE,FALSE,1.436703e-03,0.3568400243,-2.003446,4.512945e-02,eQTL,chr22_37320379_39179317
22,38507658,38507659,ENSG00000100201,37483438,41680000,AC_DeJager_eQTL,Bellenguez_2022,bayes_r,TRUE,FALSE,6.463062e-05,0.8451109297,-2.246207,2.469078e-02,eQTL,chr22_37320379_39179317
22,38507658,38507659,ENSG00000100201,37483438,41680000,AC_DeJager_eQTL,Bellenguez_2022,enet,TRUE,FALSE,3.824801e-03,0.1325076971,-2.735684,6.225077e-03,eQTL,chr22_37320379_39179317
22,38507658,38507659,ENSG00000100201,37483438,41680000,AC_DeJager_eQTL,Bellenguez_2022,lasso,TRUE,FALSE,1.119856e-04,0.7970553129,-2.877139,4.012982e-03,eQTL,chr22_37320379_39179317
22,38507658,38507659,ENSG00000100201,37483438,41680000,AC_DeJager_eQTL,Bellenguez_2022,mrash,TRUE,FALSE,1.002848e-03,0.4414620197,-2.048669,4.049444e-02,eQTL,chr22_37320379_39179317
22,38507658,38507659,ENSG00000100201,37483438,41680000,AC_DeJager_eQTL,Bellenguez_2022,mrmash,TRUE,TRUE,2.021946e-02,0.0005146089,-3.225360,1.258142e-03,eQTL,chr22_37320379_39179317
22,38507658,38507659,ENSG00000100201,37483438,41680000,AC_DeJager_eQTL,Bellenguez_2022,mvsusie,TRUE,FALSE,1.266767e-02,0.0060745647,-3.142644,1.674295e-03,eQTL,chr22_37320379_39179317
22,38507658,38507659,ENSG00000100201,37483438,41680000,AC_DeJager_eQTL,Bellenguez_2022,susie,TRUE,FALSE,4.720863e-07,0.9866788830,-2.917403,3.529597e-03,eQTL,chr22_37320379_39179317
22,38507658,38507659,ENSG00000100201,37483438,41680000,AC_DeJager_eQTL,Jansen_2021,lasso,TRUE,FALSE,1.119856e-04,0.7970553129,-1.974048,4.837628e-02,eQTL,chr22_37320379_39179317
22,38507658,38507659,ENSG00000100201,37483438,41680000,AC_DeJager_eQTL,Jansen_2021,mrmash,TRUE,TRUE,2.021946e-02,0.0005146089,-2.960595,3.070457e-03,eQTL,chr22_37320379_39179317


In [22]:
#tnip1
sig |> filter(molecular_id == "ENSG00000145901" & str_detect(context, "Exc"))

#chr,start,end,molecular_id,TADB_start,TADB_end,context,gwas_study,method,is_imputable,is_selected_method,rsq_cv,pval_cv,twas_z,twas_pval,type,block
<int>,<int>,<int>,<chr>,<int>,<int>,<chr>,<chr>,<chr>,<lgl>,<lgl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Bellenguez_2022,bayes_l,True,False,0.001680124,0.4026625,2.378591,0.01737894,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Bellenguez_2022,bayes_r,True,False,0.020103673,0.003633386,4.797897,1.603403e-06,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Bellenguez_2022,enet,True,False,0.025244056,0.001101203,4.342834,1.406566e-05,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Bellenguez_2022,lasso,True,False,0.036256777,8.784071e-05,4.51003,6.481846e-06,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Bellenguez_2022,mrash,True,False,0.007085456,0.08526346,4.777922,1.771162e-06,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Bellenguez_2022,susie,True,False,0.018211856,0.005660959,4.796213,1.616935e-06,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Bellenguez_EADB_2022,bayes_r,True,False,0.020103673,0.003633386,2.898844,0.003745416,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Bellenguez_EADB_2022,enet,True,False,0.025244056,0.001101203,2.948414,0.003194089,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Bellenguez_EADB_2022,lasso,True,False,0.036256777,8.784071e-05,3.037821,0.002382956,eQTL,chr5_149895776_152257891
5,151093575,151093576,ENSG00000145901,147240000,154960000,Exc_DeJager_eQTL,Bellenguez_EADB_2022,mrash,True,False,0.007085456,0.08526346,3.058359,0.002225529,eQTL,chr5_149895776_152257891


## Multi-context

In [108]:
meta_file = fread("project/image_QTL/DNE_cb_explained_by_xQTL/xQTL_top_loci.tsv")
meta_file = meta_file |> filter(Method == "multi_context_finemapping")
dim(meta_file)

In [109]:
# Assuming your meta_file dataframe is already loaded
# If not, you'd need to recreate it from your data

# Create a function to read a single file and add identifiers
read_topolci_file <- function(path, cohort, data_type, modality) {
  # Read the file - assuming tab-delimited with gzip compression
  data <- read_tsv(path, show_col_types = FALSE)
  
  # Add identifier columns
  data$Cohort <- cohort
  data$Data_Type <- data_type
  data$Modality <- modality
  
  # Create combined identifier
  data$Cohort_Data_Type_Modality <- paste(cohort, data_type, modality, sep = "_")
  
  return(data)
}

# Apply the function to each row in the meta_file
combined_data <- pmap_dfr(
  list(
    path = meta_file$Path,
    cohort = meta_file$Cohort,
    data_type = meta_file$Data_Type,
    modality = meta_file$Modality
  ),
  read_topolci_file
)

# Save the combined dataset if needed
#fwrite(combined_data, "combined_top_loci_data.tsv")

# Preview the result
head(combined_data)

#chr,start,end,a1,a2,variant_ID,gene_ID,event_ID,cs_coverage_0.95,cs_coverage_0.7,cs_coverage_0.5,PIP,conditional_effect,lfsr,Cohort,Data_Type,Modality,Cohort_Data_Type_Modality
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,1023774,1023775,A,G,chr1:1023775:G:A,ENSG00000188157,BM_10_MSBB_eQTL;BM_22_MSBB_eQTL;BM_36_MSBB_eQTL;BM_44_MSBB_eQTL,1,1,1,0.97210539,0.104506519939872;0.125894884716484;0.0900918435608349;0.11841837243765,3.79924606022026e-12;3.84197473318569e-11;7.66144505134629e-11;1.4305998342331e-16,MSBB,Multi,All,MSBB_Multi_All
1,1185633,1185634,A,G,chr1:1185634:G:A,ENSG00000186891,BM_10_MSBB_eQTL;BM_22_MSBB_eQTL;BM_36_MSBB_eQTL;BM_44_MSBB_eQTL,1,1,1,0.14623895,0.634539886004572;0.51361551629155;0.591478449188189;0.582230552873455,6.89576902647313e-13;5.66680716108471e-09;2.71034501950873e-12;9.65056341704484e-13,MSBB,Multi,All,MSBB_Multi_All
1,1185960,1185961,T,C,chr1:1185961:C:T,ENSG00000186891,BM_10_MSBB_eQTL;BM_22_MSBB_eQTL;BM_36_MSBB_eQTL;BM_44_MSBB_eQTL,1,1,1,0.07711689,0.634115468678271;0.516653150656981;0.540331824203107;0.565234066186421,6.89576902647313e-13;5.66680716108471e-09;2.71034501950873e-12;9.65056341704484e-13,MSBB,Multi,All,MSBB_Multi_All
1,1186413,1186414,A,G,chr1:1186414:G:A,ENSG00000186891,BM_10_MSBB_eQTL;BM_22_MSBB_eQTL;BM_36_MSBB_eQTL;BM_44_MSBB_eQTL,1,1,1,0.26701266,0.641661665800936;0.523696513943397;0.580825918223139;0.582827363955203,6.89576902647313e-13;5.66680716108471e-09;2.71034501950873e-12;9.65056341704484e-13,MSBB,Multi,All,MSBB_Multi_All
1,1186643,1186644,C,T,chr1:1186644:T:C,ENSG00000186891,BM_10_MSBB_eQTL;BM_22_MSBB_eQTL;BM_36_MSBB_eQTL;BM_44_MSBB_eQTL,1,1,0,0.0211809,0.635804267352963;0.540146604599051;0.57339889758486;0.581141794657505,6.89576902647313e-13;5.66680716108471e-09;2.71034501950873e-12;9.65056341704484e-13,MSBB,Multi,All,MSBB_Multi_All
1,1186938,1186939,T,C,chr1:1186939:C:T,ENSG00000186891,BM_10_MSBB_eQTL;BM_22_MSBB_eQTL;BM_36_MSBB_eQTL;BM_44_MSBB_eQTL,1,1,0,0.0211809,0.635804267352963;0.540146604599051;0.57339889758486;0.581141794657505,6.89576902647313e-13;5.66680716108471e-09;2.71034501950873e-12;9.65056341704484e-13,MSBB,Multi,All,MSBB_Multi_All


In [55]:
combined_data |> filter(gene_ID == "ENSG00000145901") # chr5:151052827:C:T

#chr,start,end,a1,a2,variant_ID,gene_ID,event_ID,cs_coverage_0.95,cs_coverage_0.7,cs_coverage_0.5,PIP,conditional_effect,lfsr,Cohort,Data_Type,Modality,Cohort_Data_Type_Modality
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
5,151052826,151052827,T,C,chr5:151052827:C:T,ENSG00000145901,Ast_DeJager_eQTL;DLPFC_DeJager_eQTL;Exc_DeJager_eQTL;Inh_DeJager_eQTL;Oli_DeJager_eQTL;PCC_DeJager_eQTL,2,2,2,0.9846132,-0.0719409643006853;-0.060144722948293;-0.186312328076544;-0.191935764979419;-0.0524778708350972;-0.0592596025427258,0.000400460767242577;8.51838224941688e-06;6.50585284034134e-06;0.000253468306175598;0.00195752000113241;3.29297696816989e-05,ROSMAP,Multi,All,ROSMAP_Multi_All
5,151080672,151080673,T,C,chr5:151080673:C:T,ENSG00000145901,Ast_DeJager_eQTL;DLPFC_DeJager_eQTL;Exc_DeJager_eQTL;Inh_DeJager_eQTL;PCC_DeJager_eQTL,1,1,1,0.9999555,0.535955820593792;0.261613174664723;0.393039478194287;0.568659712253046;0.522387015680065,0.000274807970794486;6.75656185923516e-08;2.55275134369782e-06;6.86968257750878e-05;1.37499152214527e-09,ROSMAP,Multi,All,ROSMAP_Multi_All


In [71]:
#DDX17
combined_data |> filter(gene_ID == "ENSG00000100201")
# chr22:38502046:T:C, chr22:38510427:C:T,                       chr22:38516766:A:G

#chr,start,end,a1,a2,variant_ID,gene_ID,event_ID,cs_coverage_0.95,cs_coverage_0.7,cs_coverage_0.5,PIP,conditional_effect,lfsr,Cohort,Data_Type,Modality,Cohort_Data_Type_Modality
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
22,38481455,38481456,G,T,chr22:38481456:T:G,ENSG00000100201,,0,0,0,0.030196324,,,MSBB,Multi,All,MSBB_Multi_All
22,38484091,38484092,G,GA,chr22:38484092:GA:G,ENSG00000100201,,0,0,0,0.032052131,,,MSBB,Multi,All,MSBB_Multi_All
22,38502542,38502543,C,CTTAT,chr22:38502543:CTTAT:C,ENSG00000100201,,0,0,0,0.032052131,,,MSBB,Multi,All,MSBB_Multi_All
22,38505088,38505089,G,A,chr22:38505089:A:G,ENSG00000100201,,0,0,0,0.047954804,,,MSBB,Multi,All,MSBB_Multi_All
22,38514804,38514805,T,C,chr22:38514805:C:T,ENSG00000100201,,0,0,0,0.032052131,,,MSBB,Multi,All,MSBB_Multi_All
22,38561477,38561478,A,G,chr22:38561478:G:A,ENSG00000100201,,0,0,0,0.031546518,,,MSBB,Multi,All,MSBB_Multi_All
22,38478390,38478391,T,C,chr22:38478391:C:T,ENSG00000100201,AC_DeJager_eQTL;Ast_DeJager_eQTL;DLPFC_DeJager_eQTL;Exc_DeJager_eQTL;Inh_DeJager_eQTL;Mic_DeJager_eQTL;OPC_DeJager_eQTL;Oli_DeJager_eQTL;PCC_DeJager_eQTL;monocyte_ROSMAP_eQTL,1,1,0,0.146524353,-0.0695762656333377;-0.129169880794159;-0.0892063274256416;-0.0858662049762716;-0.116830240577441;-0.0933731511098384;-0.139447819581871;-0.155249513858823;-0.12084478202185;-0.0891189512042862,0.000686806830438847;1.51364695429336e-05;1.53177902105801e-09;0.000151061280846832;0.000655007033428711;0.00596017449236874;0.000115210185037117;1.24000015613546e-06;1.39610639379668e-05;8.78519527249217e-08,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38481769,38481770,A,AC,chr22:38481770:AC:A,ENSG00000100201,AC_DeJager_eQTL;Ast_DeJager_eQTL;DLPFC_DeJager_eQTL;Exc_DeJager_eQTL;Inh_DeJager_eQTL;Mic_DeJager_eQTL;OPC_DeJager_eQTL;Oli_DeJager_eQTL;PCC_DeJager_eQTL;monocyte_ROSMAP_eQTL,1,0,0,0.005513079,-0.066748093821715;-0.12277196846352;-0.0883769029797236;-0.0823106928473388;-0.110288731443328;-0.0924577347320876;-0.129728939020501;-0.148599259265835;-0.117724875619324;-0.0871974550314546,0.000686806830438847;1.51364695429336e-05;1.53177902105801e-09;0.000151061280846832;0.000655007033428711;0.00596017449236874;0.000115210185037117;1.24000015613546e-06;1.39610639379668e-05;8.78519527249217e-08,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38485448,38485449,C,A,chr22:38485449:A:C,ENSG00000100201,AC_DeJager_eQTL;Ast_DeJager_eQTL;DLPFC_DeJager_eQTL;Exc_DeJager_eQTL;Inh_DeJager_eQTL;Mic_DeJager_eQTL;OPC_DeJager_eQTL;Oli_DeJager_eQTL;PCC_DeJager_eQTL;monocyte_ROSMAP_eQTL,1,1,1,0.527397622,-0.0730292980739849;-0.126358133858273;-0.0897435248012195;-0.0863205908094982;-0.117766022078487;-0.102255043336284;-0.139609920349515;-0.156375679178378;-0.125376892536246;-0.0906529625927553,0.000686806830438847;1.51364695429336e-05;1.53177902105801e-09;0.000151061280846832;0.000655007033428711;0.00596017449236874;0.000115210185037117;1.24000015613546e-06;1.39610639379668e-05;8.78519527249217e-08,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38502045,38502046,C,T,chr22:38502046:T:C,ENSG00000100201,AC_DeJager_eQTL;Ast_DeJager_eQTL;DLPFC_DeJager_eQTL;Exc_DeJager_eQTL;Inh_DeJager_eQTL;Mic_DeJager_eQTL;OPC_DeJager_eQTL;Oli_DeJager_eQTL;PCC_DeJager_eQTL;monocyte_ROSMAP_eQTL,1,1,0,0.166679975,-0.069623552338573;-0.129368907297536;-0.0890368562733994;-0.086626536823949;-0.114278926289242;-0.0946453114961523;-0.141155694684732;-0.154138945400148;-0.118204774752326;-0.090468936103128,0.000686806830438847;1.51364695429336e-05;1.53177902105801e-09;0.000151061280846832;0.000655007033428711;0.00596017449236874;0.000115210185037117;1.24000015613546e-06;1.39610639379668e-05;8.78519527249217e-08,ROSMAP,Multi,All,ROSMAP_Multi_All


In [85]:
# KATNA1
#combined_data |> filter(gene_ID == "ENSG00000186625")
combined_data |> filter(variant_ID == 'chr6:149630078:G:A')

#chr,start,end,a1,a2,variant_ID,gene_ID,event_ID,cs_coverage_0.95,cs_coverage_0.7,cs_coverage_0.5,PIP,conditional_effect,lfsr,Cohort,Data_Type,Modality,Cohort_Data_Type_Modality
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
6,149630077,149630078,A,G,chr6:149630078:G:A,ENSG00000186625,Ast_DeJager_eQTL;Exc_DeJager_eQTL;Inh_DeJager_eQTL;PCC_DeJager_eQTL,1,0,0,0.002687505,0.0841374437586753;0.23711119372477;0.202241886878106;0.053816021703742,3.94541657049243e-05;3.32880237815612e-09;2.54264187791845e-07;0.000113610461504865,ROSMAP,Multi,All,ROSMAP_Multi_All


In [57]:
#CBY1
combined_data |> filter(gene_ID == "ENSG00000100211")


#chr,start,end,a1,a2,variant_ID,gene_ID,event_ID,cs_coverage_0.95,cs_coverage_0.7,cs_coverage_0.5,PIP,conditional_effect,lfsr,Cohort,Data_Type,Modality,Cohort_Data_Type_Modality
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
22,38472215,38472216,A,G,chr22:38472216:G:A,ENSG00000100211,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;PCC_DeJager_eQTL,2,0,0,0.008618324,0.294800461772055;0.139115776920473;0.269570708620722,0.000138597917265121;0.000125396672158548;0.000105902277308164,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38475416,38475417,A,G,chr22:38475417:G:A,ENSG00000100211,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;PCC_DeJager_eQTL,2,2,2,0.024055994,0.298674384262293;0.141323081008267;0.278291911441999,0.000138597917265121;0.000125396672158548;0.000105902277308164,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38475885,38475886,A,G,chr22:38475886:G:A,ENSG00000100211,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;PCC_DeJager_eQTL,2,2,2,0.039988817,0.295690469929494;0.141421445054892;0.276269347752163,0.000138597917265121;0.000125396672158548;0.000105902277308164,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38481750,38481751,A,C,chr22:38481751:C:A,ENSG00000100211,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;PCC_DeJager_eQTL,2,2,2,0.054983148,0.310556354981724;0.148462738201577;0.293033953100927,0.000138597917265121;0.000125396672158548;0.000105902277308164,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38481912,38481913,C,T,chr22:38481913:T:C,ENSG00000100211,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;PCC_DeJager_eQTL,2,2,2,0.040327569,0.303710603859182;0.143527096182703;0.289690641406974,0.000138597917265121;0.000125396672158548;0.000105902277308164,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38482752,38482753,A,C,chr22:38482753:C:A,ENSG00000100211,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;PCC_DeJager_eQTL,2,2,2,0.028034412,0.300144846216864;0.143851974069019;0.280132561747914,0.000138597917265121;0.000125396672158548;0.000105902277308164,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38484681,38484682,T,C,chr22:38484682:C:T,ENSG00000100211,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;PCC_DeJager_eQTL,2,2,2,0.054983148,0.310556354981724;0.148462738201577;0.293033953100927,0.000138597917265121;0.000125396672158548;0.000105902277308164,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38496639,38496640,G,A,chr22:38496640:A:G,ENSG00000100211,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;PCC_DeJager_eQTL,2,2,2,0.017592895,0.301673823162813;0.142033946468302;0.278342603858541,0.000138597917265121;0.000125396672158548;0.000105902277308164,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38497195,38497196,T,C,chr22:38497196:C:T,ENSG00000100211,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;PCC_DeJager_eQTL,2,2,2,0.017592895,0.301673823162813;0.142033946468302;0.278342603858541,0.000138597917265121;0.000125396672158548;0.000105902277308164,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38498943,38498944,G,A,chr22:38498944:A:G,ENSG00000100211,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;PCC_DeJager_eQTL,2,2,2,0.017592895,0.301673823162813;0.142033946468302;0.278342603858541,0.000138597917265121;0.000125396672158548;0.000105902277308164,ROSMAP,Multi,All,ROSMAP_Multi_All


In [110]:
# SUN2
combined_data |> filter(gene_ID == "ENSG00000100242")

combined_data |> filter(variant_ID == "chr22:38578149:C:A")

#chr,start,end,a1,a2,variant_ID,gene_ID,event_ID,cs_coverage_0.95,cs_coverage_0.7,cs_coverage_0.5,PIP,conditional_effect,lfsr,Cohort,Data_Type,Modality,Cohort_Data_Type_Modality
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
22,38574433,38574434,A,G,chr22:38574434:G:A,ENSG00000100242,DLPFC_Bennett_pQTL;Exc_DeJager_eQTL;Mic_DeJager_eQTL;Oli_DeJager_eQTL,0,0,2,0.017336995,-0.124288008537991;0.082372222468656;0.246152877318209;-0.136193948986888,0.00135229110593613;0.0029942132992663;0.000554221564510136;7.70859473994561e-06,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38581884,38581885,C,G,chr22:38581885:G:C,ENSG00000100242,DLPFC_Bennett_pQTL;Exc_DeJager_eQTL;Mic_DeJager_eQTL;Oli_DeJager_eQTL,0,0,2,0.008532235,-0.121881060839932;0.0802308919004644;0.231821388927877;-0.140395715085595,0.00135229110593613;0.0029942132992663;0.000554221564510136;7.70859473994561e-06,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38585004,38585005,T,C,chr22:38585005:C:T,ENSG00000100242,DLPFC_Bennett_pQTL;Exc_DeJager_eQTL;Mic_DeJager_eQTL;Oli_DeJager_eQTL,0,0,2,0.012286915,-0.123271821309376;0.0777911981091653;0.235760809234259;-0.138786032680385,0.00135229110593613;0.0029942132992663;0.000554221564510136;7.70859473994561e-06,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38586436,38586437,A,T,chr22:38586437:T:A,ENSG00000100242,DLPFC_Bennett_pQTL;Exc_DeJager_eQTL;Mic_DeJager_eQTL;Oli_DeJager_eQTL,0,0,2,0.007103724,-0.121764738806507;0.0815346605618554;0.231165322355385;-0.135830356460141,0.00135229110593613;0.0029942132992663;0.000554221564510136;7.70859473994561e-06,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38589059,38589060,C,T,chr22:38589060:T:C,ENSG00000100242,DLPFC_Bennett_pQTL;Exc_DeJager_eQTL;Mic_DeJager_eQTL;Oli_DeJager_eQTL,0,0,2,0.012670327,-0.13324573651497;0.0704537786080963;0.191048926626627;-0.148336739230042,0.00135229110593613;0.0029942132992663;0.000554221564510136;7.70859473994561e-06,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38589812,38589813,C,A,chr22:38589813:A:C,ENSG00000100242,DLPFC_Bennett_pQTL;Exc_DeJager_eQTL;Mic_DeJager_eQTL;Oli_DeJager_eQTL,0,0,2,0.006905779,-0.121914732311112;0.0813897152597761;0.230685056509257;-0.136477169463327,0.00135229110593613;0.0029942132992663;0.000554221564510136;7.70859473994561e-06,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38593807,38593808,G,A,chr22:38593808:A:G,ENSG00000100242,DLPFC_Bennett_pQTL;Exc_DeJager_eQTL;Mic_DeJager_eQTL;Oli_DeJager_eQTL,0,0,2,0.011734257,-0.129388970596724;0.0704665102058869;0.184868760059273;-0.152029650903433,0.00135229110593613;0.0029942132992663;0.000554221564510136;7.70859473994561e-06,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38597364,38597365,T,C,chr22:38597365:C:T,ENSG00000100242,DLPFC_Bennett_pQTL;Exc_DeJager_eQTL;Mic_DeJager_eQTL;Oli_DeJager_eQTL,0,0,2,0.017787104,-0.116793567456719;0.0822042540542496;0.234391668107533;-0.14220571381934,0.00135229110593613;0.0029942132992663;0.000554221564510136;7.70859473994561e-06,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38604391,38604392,A,G,chr22:38604392:G:A,ENSG00000100242,DLPFC_Bennett_pQTL;Exc_DeJager_eQTL;Mic_DeJager_eQTL;Oli_DeJager_eQTL,0,0,2,0.008638212,-0.124588838960029;0.0814675791581327;0.230804786787413;-0.136178360492473,0.00135229110593613;0.0029942132992663;0.000554221564510136;7.70859473994561e-06,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38617516,38617517,A,G,chr22:38617517:G:A,ENSG00000100242,DLPFC_Bennett_pQTL;Exc_DeJager_eQTL;Mic_DeJager_eQTL;Oli_DeJager_eQTL,0,0,2,0.006877635,-0.121730347697711;0.0817175369196799;0.231905274366772;-0.135809195520042,0.00135229110593613;0.0029942132992663;0.000554221564510136;7.70859473994561e-06,ROSMAP,Multi,All,ROSMAP_Multi_All


#chr,start,end,a1,a2,variant_ID,gene_ID,event_ID,cs_coverage_0.95,cs_coverage_0.7,cs_coverage_0.5,PIP,conditional_effect,lfsr,Cohort,Data_Type,Modality,Cohort_Data_Type_Modality
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
22,38578148,38578149,A,C,chr22:38578149:C:A,ENSG00000184949,BM_10_MSBB_eQTL;BM_22_MSBB_eQTL;BM_36_MSBB_eQTL;BM_44_MSBB_eQTL,1,1,0,0.030644223,0.243091908462494;0.276177905852097;0.26745921369733;0.242943367507419,9.00171739809861e-19;6.49692345840193e-18;1.98496875324964e-15;7.90689759701292e-19,MSBB,Multi,All,MSBB_Multi_All
22,38578148,38578149,A,C,chr22:38578149:C:A,ENSG00000100211,DLPFC_DeJager_eQTL;Exc_DeJager_eQTL;Inh_DeJager_eQTL;PCC_DeJager_eQTL,1,1,0,0.026494754,-0.0598052754673848;-0.340639957480815;-0.302997161916641;-0.0618911739769595,2.22361549874307e-09;1.9300595179007e-17;4.09093556746394e-09;5.33855436621758e-05,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38578148,38578149,A,C,chr22:38578149:C:A,ENSG00000100216,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;PCC_DeJager_eQTL,0,0,0,0.002060264,-0.0660313729200387;-0.0621164338078944;-0.0542885215102135,2.55597480967741e-07;9.12871100306257e-06;5.34005069892543e-06,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38578148,38578149,A,C,chr22:38578149:C:A,ENSG00000184949,AC_DeJager_eQTL;Ast_DeJager_eQTL;DLPFC_DeJager_eQTL;Exc_DeJager_eQTL;Inh_DeJager_eQTL;PCC_DeJager_eQTL,1,1,1,0.099100242,0.256773923990074;0.230630481568981;0.299233353949708;0.364966093933308;0.410860470601441;0.249092016807923,1.07973528503614e-35;2.10021683533061e-10;1.07973528476926e-35;4.23245611751239e-34;5.41291059735074e-20;4.48665906674278e-32,ROSMAP,Multi,All,ROSMAP_Multi_All


In [None]:
#  twas
# NO PCMT1, 
## TNIP1 have risk effect across contexts, CBY1 have risk effect across contexts, except for ROSMAP_PCC_sQTL_chr17:46038686:46039701:clu_111747_-:PR:ENSG00000120071
## DXX17  have protective effect across contexts

## xQTL

In [3]:
meta_file = fread("project/image_QTL/DNE_cb_explained_by_xQTL/xQTL_top_loci.tsv")
#meta_file = meta_file |> filter(Method != "AD_GWAS_finemapping" & Method != "ColocBoost" & Method != "twas"& Method != "ctwas")

In [46]:
read_topolci_file <- function(path, cohort, data_type, modality) {
  # Read everything as character first to avoid type conflicts
  data <- read_tsv(path, show_col_types = FALSE, col_types = cols(.default = "c"))
  
  # Add identifier columns
  data$Cohort <- cohort
  data$Data_Type <- data_type
  data$Modality <- modality
  data$Cohort_Data_Type_Modality <- paste(cohort, data_type, modality, sep = "_")
  
  return(data)
}

# Use bind_rows instead of pmap_dfr for better column handling
combined_data <- map_dfr(
  1:nrow(meta_file),
  ~ read_topolci_file(
    meta_file$Path[.x],
    meta_file$Cohort[.x],
    meta_file$Data_Type[.x],
    meta_file$Modality[.x]
  )
)


# Convert other numeric columns as needed
numeric_cols <- c("conditional_effect", "PIP", "chr", "start", "end")  # adjust as needed
for (col in numeric_cols) {
  if (col %in% names(combined_data)) {
    combined_data[[col]] <- as.numeric(combined_data[[col]])
  }
}
# Preview the result
head(combined_data)
dim(combined_data)

"NAs introduced by coercion"
"NAs introduced by coercion"


#chr,start,end,a1,a2,variant_ID,gene_ID,event_ID,cs_coverage_0.95,cs_coverage_0.7,...,grid_resolution,cs_id,cs_root,grid_positions,grid_effects,epi_mark_positions,epi_mark_names,epi_mark_effects,chr,grid_position
<chr>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,...,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<chr>
1,903284,903285,C,T,chr1:903285:T:C,ENSG00000235169,Knight_eQTL_brain_ENSG00000235169,0,0,...,,,,,,,,,,
1,903351,903352,A,G,chr1:903352:G:A,ENSG00000235169,Knight_eQTL_brain_ENSG00000235169,0,0,...,,,,,,,,,,
1,903509,903510,G,A,chr1:903510:A:G,ENSG00000235169,Knight_eQTL_brain_ENSG00000235169,0,0,...,,,,,,,,,,
1,903535,903536,T,A,chr1:903536:A:T,ENSG00000235169,Knight_eQTL_brain_ENSG00000235169,0,0,...,,,,,,,,,,
1,904080,904081,C,T,chr1:904081:T:C,ENSG00000235169,Knight_eQTL_brain_ENSG00000235169,0,0,...,,,,,,,,,,
1,904114,904115,T,G,chr1:904115:G:T,ENSG00000235169,Knight_eQTL_brain_ENSG00000235169,0,0,...,,,,,,,,,,


## trans signals for interested genes

In [58]:
meta_file = fread("project/image_QTL/DNE_cb_explained_by_xQTL/xQTL_top_loci.tsv")
meta_file = meta_file |> filter(Method == "trans_finemapping")

In [59]:
# Assuming your meta_file dataframe is already loaded
# If not, you'd need to recreate it from your data

# Create a function to read a single file and add identifiers
read_topolci_file <- function(path, cohort, data_type, modality) {
  # Read the file - assuming tab-delimited with gzip compression
  data <- read_tsv(path, show_col_types = FALSE)
  
  # Add identifier columns
  data$Cohort <- cohort
  data$Data_Type <- data_type
  data$Modality <- modality
  
  # Create combined identifier
  data$Cohort_Data_Type_Modality <- paste(cohort, data_type, modality, sep = "_")
  
  return(data)
}

# Apply the function to each row in the meta_file
combined_data <- pmap_dfr(
  list(
    path = meta_file$Path,
    cohort = meta_file$Cohort,
    data_type = meta_file$Data_Type,
    modality = meta_file$Modality
  ),
  read_topolci_file
)

# Save the combined dataset if needed
#fwrite(combined_data, "combined_top_loci_data.tsv")

# Preview the result
head(combined_data)

#chr,start,end,a1,a2,variant_ID,gene_ID,event_ID,cs_coverage_0.95,cs_coverage_0.95_purity0.5,cs_coverage_0.7,cs_coverage_0.7_purity0.5,cs_coverage_0.5,cs_coverage_0.5_purity0.5,PIP,conditional_effect,Cohort,Data_Type,Modality,Cohort_Data_Type_Modality
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>
1,83365797,83365798,C,T,chr1:83365798:T:C,chr1_83003483_84743078,Knight_eQTL_ENSG00000267774,0,0,0,0,0,0,0.2824035,0.049906228,KNIGHT,eQTL,Brain,KNIGHT_eQTL_Brain
1,83396454,83396455,G,T,chr1:83396455:T:G,chr1_83003483_84743078,Knight_eQTL_ENSG00000267774,0,0,0,0,0,0,0.2749731,-0.005958503,KNIGHT,eQTL,Brain,KNIGHT_eQTL_Brain
1,83463655,83463656,G,A,chr1:83463656:A:G,chr1_83003483_84743078,Knight_eQTL_ENSG00000267774,0,0,0,0,0,0,0.2758034,-0.015144148,KNIGHT,eQTL,Brain,KNIGHT_eQTL_Brain
1,83542389,83542390,A,G,chr1:83542390:G:A,chr1_83003483_84743078,Knight_eQTL_ENSG00000267774,0,0,0,0,0,0,0.4817335,0.217448356,KNIGHT,eQTL,Brain,KNIGHT_eQTL_Brain
1,83589029,83589030,GT,G,chr1:83589030:G:GT,chr1_83003483_84743078,Knight_eQTL_ENSG00000267774,0,0,0,0,0,0,0.2705304,0.006084825,KNIGHT,eQTL,Brain,KNIGHT_eQTL_Brain
1,83617072,83617073,A,C,chr1:83617073:C:A,chr1_83003483_84743078,Knight_eQTL_ENSG00000267774,0,0,0,0,0,0,0.3083257,-0.040452104,KNIGHT,eQTL,Brain,KNIGHT_eQTL_Brain


In [152]:
#combined_data = combined_data |> filter(cs_coverage_0.95 > 0)

In [64]:
combined_data |> filter(str_detect(variant_ID ,"chr22:38"))#ENSG00000100201, DDX17

#chr,start,end,a1,a2,variant_ID,gene_ID,event_ID,cs_coverage_0.95,cs_coverage_0.95_purity0.5,cs_coverage_0.7,cs_coverage_0.7_purity0.5,cs_coverage_0.5,cs_coverage_0.5_purity0.5,PIP,conditional_effect,Cohort,Data_Type,Modality,Cohort_Data_Type_Modality
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>


### Aging_new_DNE

In [163]:
dim(aging_DNE)
length(unique(aging_DNE$variant_ID))

In [164]:
aging_DNE_overlap_xQTL = aging_DNE |> inner_join(combined_data, by = "variant_ID")

In [165]:
dim(aging_DNE_overlap_xQTL)
length(unique(aging_DNE_overlap_xQTL$variant_ID))


In [166]:
prop = (0/1102)*100
prop

### AD_DNE

In [167]:
dim(AD_DNE)
length(unique(AD_DNE$variant_ID))

In [168]:
AD_DNE_overlap_xQTL = AD_DNE |> inner_join(combined_data, by = "variant_ID")

In [169]:
dim(AD_DNE_overlap_xQTL)
length(unique(AD_DNE_overlap_xQTL$variant_ID))


In [171]:
prop = (0/799)*100
prop

### PD_DNE

In [172]:
dim(PD_DNE)
length(unique(PD_DNE$variant_ID))

In [173]:
PD_DNE_overlap_xQTL = PD_DNE |> inner_join(combined_data, by = "variant_ID")

In [174]:
dim(PD_DNE_overlap_xQTL)
length(unique(PD_DNE_overlap_xQTL$variant_ID))


In [175]:
prop = (0/152)*100
prop

## Multi_context


In [4]:
meta_file = fread("project/image_QTL/DNE_cb_explained_by_xQTL/xQTL_top_loci.tsv")
meta_file = meta_file |> filter(Method == "multi_context_finemapping")

In [14]:
# Assuming your meta_file dataframe is already loaded
# If not, you'd need to recreate it from your data

# Create a function to read a single file and add identifiers
read_topolci_file <- function(path, cohort, data_type, modality) {
  # Read the file - assuming tab-delimited with gzip compression
  data <- read_tsv(path, show_col_types = FALSE)
  
  # Add identifier columns
  data$Cohort <- cohort
  data$Data_Type <- data_type
  data$Modality <- modality
  
  # Create combined identifier
  data$Cohort_Data_Type_Modality <- paste(cohort, data_type, modality, sep = "_")
  
  return(data)
}

# Apply the function to each row in the meta_file
combined_data <- pmap_dfr(
  list(
    path = meta_file$Path,
    cohort = meta_file$Cohort,
    data_type = meta_file$Data_Type,
    modality = meta_file$Modality
  ),
  read_topolci_file
)

# Save the combined dataset if needed
#fwrite(combined_data, "combined_top_loci_data.tsv")

# Preview the result
head(combined_data)

#chr,start,end,a1,a2,variant_ID,gene_ID,event_ID,cs_coverage_0.95,cs_coverage_0.7,cs_coverage_0.5,PIP,conditional_effect,lfsr,Cohort,Data_Type,Modality,Cohort_Data_Type_Modality
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,1023774,1023775,A,G,chr1:1023775:G:A,ENSG00000188157,BM_10_MSBB_eQTL;BM_22_MSBB_eQTL;BM_36_MSBB_eQTL;BM_44_MSBB_eQTL,1,1,1,0.97210539,0.104506519939872;0.125894884716484;0.0900918435608349;0.11841837243765,3.79924606022026e-12;3.84197473318569e-11;7.66144505134629e-11;1.4305998342331e-16,MSBB,Multi,All,MSBB_Multi_All
1,1185633,1185634,A,G,chr1:1185634:G:A,ENSG00000186891,BM_10_MSBB_eQTL;BM_22_MSBB_eQTL;BM_36_MSBB_eQTL;BM_44_MSBB_eQTL,1,1,1,0.14623895,0.634539886004572;0.51361551629155;0.591478449188189;0.582230552873455,6.89576902647313e-13;5.66680716108471e-09;2.71034501950873e-12;9.65056341704484e-13,MSBB,Multi,All,MSBB_Multi_All
1,1185960,1185961,T,C,chr1:1185961:C:T,ENSG00000186891,BM_10_MSBB_eQTL;BM_22_MSBB_eQTL;BM_36_MSBB_eQTL;BM_44_MSBB_eQTL,1,1,1,0.07711689,0.634115468678271;0.516653150656981;0.540331824203107;0.565234066186421,6.89576902647313e-13;5.66680716108471e-09;2.71034501950873e-12;9.65056341704484e-13,MSBB,Multi,All,MSBB_Multi_All
1,1186413,1186414,A,G,chr1:1186414:G:A,ENSG00000186891,BM_10_MSBB_eQTL;BM_22_MSBB_eQTL;BM_36_MSBB_eQTL;BM_44_MSBB_eQTL,1,1,1,0.26701266,0.641661665800936;0.523696513943397;0.580825918223139;0.582827363955203,6.89576902647313e-13;5.66680716108471e-09;2.71034501950873e-12;9.65056341704484e-13,MSBB,Multi,All,MSBB_Multi_All
1,1186643,1186644,C,T,chr1:1186644:T:C,ENSG00000186891,BM_10_MSBB_eQTL;BM_22_MSBB_eQTL;BM_36_MSBB_eQTL;BM_44_MSBB_eQTL,1,1,0,0.0211809,0.635804267352963;0.540146604599051;0.57339889758486;0.581141794657505,6.89576902647313e-13;5.66680716108471e-09;2.71034501950873e-12;9.65056341704484e-13,MSBB,Multi,All,MSBB_Multi_All
1,1186938,1186939,T,C,chr1:1186939:C:T,ENSG00000186891,BM_10_MSBB_eQTL;BM_22_MSBB_eQTL;BM_36_MSBB_eQTL;BM_44_MSBB_eQTL,1,1,0,0.0211809,0.635804267352963;0.540146604599051;0.57339889758486;0.581141794657505,6.89576902647313e-13;5.66680716108471e-09;2.71034501950873e-12;9.65056341704484e-13,MSBB,Multi,All,MSBB_Multi_All


In [6]:
combined_data = combined_data |> filter(cs_coverage_0.95 > 0)

In [15]:
combined_data |> filter(variant_ID == "chr22:38485449:A:C")

#chr,start,end,a1,a2,variant_ID,gene_ID,event_ID,cs_coverage_0.95,cs_coverage_0.7,cs_coverage_0.5,PIP,conditional_effect,lfsr,Cohort,Data_Type,Modality,Cohort_Data_Type_Modality
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
22,38485448,38485449,C,A,chr22:38485449:A:C,ENSG00000100201,AC_DeJager_eQTL;Ast_DeJager_eQTL;DLPFC_DeJager_eQTL;Exc_DeJager_eQTL;Inh_DeJager_eQTL;Mic_DeJager_eQTL;OPC_DeJager_eQTL;Oli_DeJager_eQTL;PCC_DeJager_eQTL;monocyte_ROSMAP_eQTL,1,1,1,0.5273976,-0.0730292980739849;-0.126358133858273;-0.0897435248012195;-0.0863205908094982;-0.117766022078487;-0.102255043336284;-0.139609920349515;-0.156375679178378;-0.125376892536246;-0.0906529625927553,0.000686806830438847;1.51364695429336e-05;1.53177902105801e-09;0.000151061280846832;0.000655007033428711;0.00596017449236874;0.000115210185037117;1.24000015613546e-06;1.39610639379668e-05;8.78519527249217e-08,ROSMAP,Multi,All,ROSMAP_Multi_All


In [16]:
#DDX17
combined_data |> filter(gene_ID == "ENSG00000100201")

#chr,start,end,a1,a2,variant_ID,gene_ID,event_ID,cs_coverage_0.95,cs_coverage_0.7,cs_coverage_0.5,PIP,conditional_effect,lfsr,Cohort,Data_Type,Modality,Cohort_Data_Type_Modality
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
22,38481455,38481456,G,T,chr22:38481456:T:G,ENSG00000100201,,0,0,0,0.030196324,,,MSBB,Multi,All,MSBB_Multi_All
22,38484091,38484092,G,GA,chr22:38484092:GA:G,ENSG00000100201,,0,0,0,0.032052131,,,MSBB,Multi,All,MSBB_Multi_All
22,38502542,38502543,C,CTTAT,chr22:38502543:CTTAT:C,ENSG00000100201,,0,0,0,0.032052131,,,MSBB,Multi,All,MSBB_Multi_All
22,38505088,38505089,G,A,chr22:38505089:A:G,ENSG00000100201,,0,0,0,0.047954804,,,MSBB,Multi,All,MSBB_Multi_All
22,38514804,38514805,T,C,chr22:38514805:C:T,ENSG00000100201,,0,0,0,0.032052131,,,MSBB,Multi,All,MSBB_Multi_All
22,38561477,38561478,A,G,chr22:38561478:G:A,ENSG00000100201,,0,0,0,0.031546518,,,MSBB,Multi,All,MSBB_Multi_All
22,38478390,38478391,T,C,chr22:38478391:C:T,ENSG00000100201,AC_DeJager_eQTL;Ast_DeJager_eQTL;DLPFC_DeJager_eQTL;Exc_DeJager_eQTL;Inh_DeJager_eQTL;Mic_DeJager_eQTL;OPC_DeJager_eQTL;Oli_DeJager_eQTL;PCC_DeJager_eQTL;monocyte_ROSMAP_eQTL,1,1,0,0.146524353,-0.0695762656333377;-0.129169880794159;-0.0892063274256416;-0.0858662049762716;-0.116830240577441;-0.0933731511098384;-0.139447819581871;-0.155249513858823;-0.12084478202185;-0.0891189512042862,0.000686806830438847;1.51364695429336e-05;1.53177902105801e-09;0.000151061280846832;0.000655007033428711;0.00596017449236874;0.000115210185037117;1.24000015613546e-06;1.39610639379668e-05;8.78519527249217e-08,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38481769,38481770,A,AC,chr22:38481770:AC:A,ENSG00000100201,AC_DeJager_eQTL;Ast_DeJager_eQTL;DLPFC_DeJager_eQTL;Exc_DeJager_eQTL;Inh_DeJager_eQTL;Mic_DeJager_eQTL;OPC_DeJager_eQTL;Oli_DeJager_eQTL;PCC_DeJager_eQTL;monocyte_ROSMAP_eQTL,1,0,0,0.005513079,-0.066748093821715;-0.12277196846352;-0.0883769029797236;-0.0823106928473388;-0.110288731443328;-0.0924577347320876;-0.129728939020501;-0.148599259265835;-0.117724875619324;-0.0871974550314546,0.000686806830438847;1.51364695429336e-05;1.53177902105801e-09;0.000151061280846832;0.000655007033428711;0.00596017449236874;0.000115210185037117;1.24000015613546e-06;1.39610639379668e-05;8.78519527249217e-08,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38485448,38485449,C,A,chr22:38485449:A:C,ENSG00000100201,AC_DeJager_eQTL;Ast_DeJager_eQTL;DLPFC_DeJager_eQTL;Exc_DeJager_eQTL;Inh_DeJager_eQTL;Mic_DeJager_eQTL;OPC_DeJager_eQTL;Oli_DeJager_eQTL;PCC_DeJager_eQTL;monocyte_ROSMAP_eQTL,1,1,1,0.527397622,-0.0730292980739849;-0.126358133858273;-0.0897435248012195;-0.0863205908094982;-0.117766022078487;-0.102255043336284;-0.139609920349515;-0.156375679178378;-0.125376892536246;-0.0906529625927553,0.000686806830438847;1.51364695429336e-05;1.53177902105801e-09;0.000151061280846832;0.000655007033428711;0.00596017449236874;0.000115210185037117;1.24000015613546e-06;1.39610639379668e-05;8.78519527249217e-08,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38502045,38502046,C,T,chr22:38502046:T:C,ENSG00000100201,AC_DeJager_eQTL;Ast_DeJager_eQTL;DLPFC_DeJager_eQTL;Exc_DeJager_eQTL;Inh_DeJager_eQTL;Mic_DeJager_eQTL;OPC_DeJager_eQTL;Oli_DeJager_eQTL;PCC_DeJager_eQTL;monocyte_ROSMAP_eQTL,1,1,0,0.166679975,-0.069623552338573;-0.129368907297536;-0.0890368562733994;-0.086626536823949;-0.114278926289242;-0.0946453114961523;-0.141155694684732;-0.154138945400148;-0.118204774752326;-0.090468936103128,0.000686806830438847;1.51364695429336e-05;1.53177902105801e-09;0.000151061280846832;0.000655007033428711;0.00596017449236874;0.000115210185037117;1.24000015613546e-06;1.39610639379668e-05;8.78519527249217e-08,ROSMAP,Multi,All,ROSMAP_Multi_All


In [19]:
combined_data |> filter(str_detect(variant_ID ,"chr22:3868"))

#chr,start,end,a1,a2,variant_ID,gene_ID,event_ID,cs_coverage_0.95,cs_coverage_0.7,cs_coverage_0.5,PIP,conditional_effect,lfsr,Cohort,Data_Type,Modality,Cohort_Data_Type_Modality
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
22,38683407,38683408,G,A,chr22:38683408:A:G,ENSG00000100221,,0,1,0,0.009372821,,,MSBB,Multi,All,MSBB_Multi_All
22,38684072,38684073,C,T,chr22:38684073:T:C,ENSG00000100216,BM_10_MSBB_eQTL;BM_22_MSBB_eQTL;BM_36_MSBB_eQTL;BM_44_MSBB_eQTL,1,0,0,0.04020523,-0.206777193297264;-0.154802032965996;-0.128556429022214;-0.103320278748806,3.1482448201792e-19;3.01332854263142e-11;1.23387311383683e-09;4.19780836096992e-08,MSBB,Multi,All,MSBB_Multi_All
22,38686168,38686169,G,A,chr22:38686169:A:G,ENSG00000100216,BM_10_MSBB_eQTL;BM_22_MSBB_eQTL;BM_36_MSBB_eQTL;BM_44_MSBB_eQTL,1,0,0,0.04020523,-0.206777193297264;-0.154802032965996;-0.128556429022214;-0.103320278748806,3.1482448201792e-19;3.01332854263142e-11;1.23387311383683e-09;4.19780836096992e-08,MSBB,Multi,All,MSBB_Multi_All
22,38683407,38683408,G,A,chr22:38683408:A:G,ENSG00000100216,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;PCC_DeJager_eQTL,0,0,0,0.004219858,-0.0703537537280521;-0.0657647793387793;-0.0569240387217595,2.55597480967741e-07;9.12871100306257e-06;5.34005069892543e-06,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38683407,38683408,G,A,chr22:38683408:A:G,ENSG00000100221,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;Exc_DeJager_eQTL;PCC_DeJager_eQTL,0,0,0,0.007090977,-0.0868004212452489;-0.0673088414306691;-0.180673836245387;-0.0609316085658734,1.44074077430459e-05;0.0003386832858892;1.66783268842494e-07;0.00107513334276745,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38683407,38683408,G,A,chr22:38683408:A:G,ENSG00000100242,DLPFC_Bennett_pQTL;Exc_DeJager_eQTL;Mic_DeJager_eQTL;Oli_DeJager_eQTL,0,0,2,0.011938169,-0.121817727403801;0.0786015299088751;0.233509706367562;-0.140647431489793,0.00135229110593613;0.0029942132992663;0.000554221564510136;7.70859473994561e-06,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38683958,38683959,C,T,chr22:38683959:T:C,ENSG00000100216,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;PCC_DeJager_eQTL,0,0,0,0.005177622,-0.0710656186844363;-0.0663162356228328;-0.0572718942251075,2.55597480967741e-07;9.12871100306257e-06;5.34005069892543e-06,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38683958,38683959,C,T,chr22:38683959:T:C,ENSG00000100221,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;Exc_DeJager_eQTL;PCC_DeJager_eQTL,0,0,0,0.006528366,-0.0869880086063873;-0.0680468260257728;-0.182338821082875;-0.0615570188120538,1.44074077430459e-05;0.0003386832858892;1.66783268842494e-07;0.00107513334276745,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38683958,38683959,C,T,chr22:38683959:T:C,ENSG00000100242,DLPFC_Bennett_pQTL;Exc_DeJager_eQTL;Mic_DeJager_eQTL;Oli_DeJager_eQTL,0,0,2,0.008423569,-0.11943597804369;0.0824655451723734;0.243253590702614;-0.13746969096718,0.00135229110593613;0.0029942132992663;0.000554221564510136;7.70859473994561e-06,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38684072,38684073,C,T,chr22:38684073:T:C,ENSG00000100211,DLPFC_DeJager_eQTL;Exc_DeJager_eQTL;Inh_DeJager_eQTL;PCC_DeJager_eQTL,1,1,0,0.027937415,-0.0587317629802145;-0.347466888448967;-0.309604578707479;-0.0599446272604493,2.22361549874307e-09;1.9300595179007e-17;4.09093556746394e-09;5.33855436621758e-05,ROSMAP,Multi,All,ROSMAP_Multi_All


In [12]:
# CBY1
combined_data |> filter(gene_ID == "ENSG00000100211")

#chr,start,end,a1,a2,variant_ID,gene_ID,event_ID,cs_coverage_0.95,cs_coverage_0.7,cs_coverage_0.5,PIP,conditional_effect,lfsr,Cohort,Data_Type,Modality,Cohort_Data_Type_Modality
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
22,38472215,38472216,A,G,chr22:38472216:G:A,ENSG00000100211,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;PCC_DeJager_eQTL,2,0,0,0.008618324,0.294800461772055;0.139115776920473;0.269570708620722,0.000138597917265121;0.000125396672158548;0.000105902277308164,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38475416,38475417,A,G,chr22:38475417:G:A,ENSG00000100211,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;PCC_DeJager_eQTL,2,2,2,0.024055994,0.298674384262293;0.141323081008267;0.278291911441999,0.000138597917265121;0.000125396672158548;0.000105902277308164,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38475885,38475886,A,G,chr22:38475886:G:A,ENSG00000100211,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;PCC_DeJager_eQTL,2,2,2,0.039988817,0.295690469929494;0.141421445054892;0.276269347752163,0.000138597917265121;0.000125396672158548;0.000105902277308164,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38481750,38481751,A,C,chr22:38481751:C:A,ENSG00000100211,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;PCC_DeJager_eQTL,2,2,2,0.054983148,0.310556354981724;0.148462738201577;0.293033953100927,0.000138597917265121;0.000125396672158548;0.000105902277308164,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38481912,38481913,C,T,chr22:38481913:T:C,ENSG00000100211,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;PCC_DeJager_eQTL,2,2,2,0.040327569,0.303710603859182;0.143527096182703;0.289690641406974,0.000138597917265121;0.000125396672158548;0.000105902277308164,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38482752,38482753,A,C,chr22:38482753:C:A,ENSG00000100211,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;PCC_DeJager_eQTL,2,2,2,0.028034412,0.300144846216864;0.143851974069019;0.280132561747914,0.000138597917265121;0.000125396672158548;0.000105902277308164,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38484681,38484682,T,C,chr22:38484682:C:T,ENSG00000100211,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;PCC_DeJager_eQTL,2,2,2,0.054983148,0.310556354981724;0.148462738201577;0.293033953100927,0.000138597917265121;0.000125396672158548;0.000105902277308164,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38496639,38496640,G,A,chr22:38496640:A:G,ENSG00000100211,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;PCC_DeJager_eQTL,2,2,2,0.017592895,0.301673823162813;0.142033946468302;0.278342603858541,0.000138597917265121;0.000125396672158548;0.000105902277308164,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38497195,38497196,T,C,chr22:38497196:C:T,ENSG00000100211,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;PCC_DeJager_eQTL,2,2,2,0.017592895,0.301673823162813;0.142033946468302;0.278342603858541,0.000138597917265121;0.000125396672158548;0.000105902277308164,ROSMAP,Multi,All,ROSMAP_Multi_All
22,38498943,38498944,G,A,chr22:38498944:A:G,ENSG00000100211,AC_DeJager_eQTL;DLPFC_DeJager_eQTL;PCC_DeJager_eQTL,2,2,2,0.017592895,0.301673823162813;0.142033946468302;0.278342603858541,0.000138597917265121;0.000125396672158548;0.000105902277308164,ROSMAP,Multi,All,ROSMAP_Multi_All


## Multi_gene

In [98]:
meta_file = fread("project/image_QTL/DNE_cb_explained_by_xQTL/xQTL_top_loci.tsv")
meta_file = meta_file |> filter(Method == "multi_gene_finemapping")

In [83]:
# Assuming your meta_file dataframe is already loaded
# If not, you'd need to recreate it from your data

# Create a function to read a single file and add identifiers
read_topolci_file <- function(path, cohort, data_type, modality) {
  # Read the file - assuming tab-delimited with gzip compression
  data <- read_tsv(path, show_col_types = FALSE)
  
  # Add identifier columns
  data$Cohort <- cohort
  data$Data_Type <- data_type
  data$Modality <- modality
  
  # Create combined identifier
  data$Cohort_Data_Type_Modality <- paste(cohort, data_type, modality, sep = "_")
  
  return(data)
}

# Apply the function to each row in the meta_file
combined_data <- pmap_dfr(
  list(
    path = meta_file$Path,
    cohort = meta_file$Cohort,
    data_type = meta_file$Data_Type,
    modality = meta_file$Modality
  ),
  read_topolci_file
)

# Save the combined dataset if needed
#fwrite(combined_data, "combined_top_loci_data.tsv")

# Preview the result
head(combined_data)

#chr,start,end,a1,a2,variant_ID,gene_ID,event_ID,cs_coverage_0.95,cs_coverage_0.95_purity0.5,PIP,conditional_effect,lfsr,Cohort,Data_Type,Modality,Cohort_Data_Type_Modality
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,103489756,103489757,C,T,chr1:103489757:T:C,chr1_100323336_104758690,ENSG00000240038;ENSG00000243480,1,1,0.008932857,-0.253080105637886;0.317505275142761,7.12792180057368e-06;0.00408151618101174,MSBB,eQTL,BM_10,MSBB_eQTL_BM_10
1,103501178,103501179,G,C,chr1:103501179:C:G,chr1_100323336_104758690,ENSG00000240038;ENSG00000243480,1,1,0.041167173,-0.264835739867384;0.33816448539235,7.12792180057368e-06;0.00408151618101174,MSBB,eQTL,BM_10,MSBB_eQTL_BM_10
1,103506058,103506059,C,T,chr1:103506059:T:C,chr1_100323336_104758690,ENSG00000240038;ENSG00000243480,1,1,0.041167173,-0.264835739867384;0.33816448539235,7.12792180057368e-06;0.00408151618101174,MSBB,eQTL,BM_10,MSBB_eQTL_BM_10
1,103509844,103509845,C,G,chr1:103509845:G:C,chr1_100323336_104758690,ENSG00000240038;ENSG00000243480,1,1,0.073022589,-0.271300529039031;0.329117985984756,7.12792180057368e-06;0.00408151618101174,MSBB,eQTL,BM_10,MSBB_eQTL_BM_10
1,103522240,103522241,C,T,chr1:103522241:T:C,chr1_100323336_104758690,ENSG00000240038;ENSG00000243480,1,1,0.041167173,-0.264835739867384;0.33816448539235,7.12792180057368e-06;0.00408151618101174,MSBB,eQTL,BM_10,MSBB_eQTL_BM_10
1,103522346,103522347,T,C,chr1:103522347:C:T,chr1_100323336_104758690,ENSG00000240038;ENSG00000243480,1,1,0.041167173,-0.264835739867384;0.33816448539235,7.12792180057368e-06;0.00408151618101174,MSBB,eQTL,BM_10,MSBB_eQTL_BM_10


In [84]:
combined_data = combined_data |> filter(cs_coverage_0.95 > 0)

### Aging_new_DNE

In [85]:
dim(aging_DNE)
length(unique(aging_DNE$variant_ID))

In [86]:
aging_DNE_overlap_xQTL = aging_DNE |> inner_join(combined_data, by = "variant_ID")

In [87]:
dim(aging_DNE_overlap_xQTL)
length(unique(aging_DNE_overlap_xQTL$variant_ID))


In [88]:
prop = (152/858)*100
prop

In [89]:
unique(aging_DNE_overlap_xQTL$event_ID.x)

In [90]:
fwrite(aging_DNE_overlap_xQTL,"project/image_QTL/CB_M1/Aging_DNE/aging_DNE_overlap_xQTL.tsv", sep = '\t')

In [91]:
gene_aging_DNE_overlap_xQTL = aging_DNE_overlap_xQTL |> count(event_ID.y)
fwrite(gene_aging_DNE_overlap_xQTL, "project/image_QTL/CB_M1/Aging_DNE/gene_aging_DNE_overlap_xQTL.txt", sep = '\t')

# Aging_DNE

## Original Data with Gene Name Conversions

| Original event_ID.y | Converted Gene Names | n |
|---------------------|---------------------|---|
| ENSG00000055211;ENSG00000120253;ENSG00000131023 | GINM1;NUP43;LATS1 | 520 |
| ENSG00000100201;ENSG00000100211;ENSG00000100221;ENSG00000100242;ENSG00000184949;ENSG00000100138 | DDX17;CBY1;JOSD1;SUN2;FAM227A;SNU13(AD biomarker) | 13 |
| ENSG00000100201;ENSG00000100211;ENSG00000100221;ENSG00000128268;ENSG00000184949 | DDX17;CBY1;FAM227A;MGAT3(AD biomarker);FAM227A | 10 |
| ENSG00000100242 | SUN2 | 69 |
| ENSG00000108771;ENSG00000037042;ENSG00000108825;ENSG00000159314;ENSG00000263715;ENSG00000120088;ENSG00000185294;ENSG00000120071;ENSG00000176681;ENSG00000185829;ENSG00000228696;ENSG00000238083;ENSG00000262633 | [ncRNA];TUBG2;[ncRNA];[ncRNA];[ncRNA];CRHR1;[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA] | 2 |
| ENSG00000120071;ENSG00000073969;ENSG00000185829;ENSG00000228696;ENSG00000238083 | [ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA] | 173 |
| ENSG00000120071;ENSG00000185829;ENSG00000228696;ENSG00000238083 | [ncRNA];[ncRNA];[ncRNA];[ncRNA] | 140 |
| ENSG00000120088;ENSG00000120071;ENSG00000176681;ENSG00000185829;ENSG00000228696;ENSG00000238083 | CRHR1;[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA] | 186 |
| ENSG00000120253;ENSG00000120256;ENSG00000131023 | [ncRNA];[ncRNA];LATS1 | 157 |
| ENSG00000120253;ENSG00000120265;ENSG00000131023 | [ncRNA];[ncRNA];LATS1 | 72 |
| ENSG00000120253;ENSG00000120265;ENSG00000131023;ENSG00000186625 | [ncRNA];[ncRNA];LATS1;[ncRNA] | 158 |
| ENSG00000120253;ENSG00000131023 | [ncRNA];LATS1 | 108 |
| ENSG00000131013;ENSG00000055211;ENSG00000120253;ENSG00000131023 | [ncRNA];GINM1;[ncRNA];LATS1 | 200 |
| ENSG00000136448;ENSG00000159314;ENSG00000225190;ENSG00000263715;ENSG00000120088;ENSG00000185294;ENSG00000256762;ENSG00000120071;ENSG00000176681;ENSG00000228696;ENSG00000238083 | [ncRNA];[ncRNA];[ncRNA];[ncRNA];CRHR1;[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA] | 36 |
| ENSG00000138439;ENSG00000138442;ENSG00000138380;ENSG00000144426 | [ncRNA];[ncRNA];[ncRNA];[ncRNA] | 339 |
| ENSG00000138439;ENSG00000163596;ENSG00000138380 | [ncRNA];[ncRNA];[ncRNA] | 350 |
| ENSG00000138439;ENSG00000163596;ENSG00000138442;ENSG00000138380 | [ncRNA];[ncRNA];[ncRNA];[ncRNA] | 330 |
| ENSG00000138442;ENSG00000144426 | [ncRNA];[ncRNA] | 72 |
| ENSG00000159314;ENSG00000225190;ENSG00000263715;ENSG00000120088;ENSG00000185294;ENSG00000256762;ENSG00000120071;ENSG00000176681;ENSG00000228696;ENSG00000238083 | [ncRNA];[ncRNA];[ncRNA];CRHR1;[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA] | 88 |
| ENSG00000159314;ENSG00000263715;ENSG00000120088;ENSG00000185294;ENSG00000120071;ENSG00000176681;ENSG00000185829;ENSG00000228696;ENSG00000238083;ENSG00000262633;ENSG00000141232 | [ncRNA];[ncRNA];CRHR1;[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA] | 3 |
| ENSG00000163596;ENSG00000138380 | [ncRNA];[ncRNA] | 368 |
| ENSG00000163596;ENSG00000138380;ENSG00000144426 | [ncRNA];[ncRNA];[ncRNA] | 896 |
| ENSG00000184949 | FAM227A | 132 |
| ENSG00000185829 | [ncRNA] | 531 |
| ENSG00000186868;ENSG00000120071;ENSG00000176681;ENSG00000185829;ENSG00000228696;ENSG00000238083 | [ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA] | 3 |
| ENSG00000228696 | [ncRNA] | 4 |
| [Empty] | [No gene ID] | 91 |

## Summary Statistics

- **Total entries**: 26
- **Total unique ENSG IDs**: 46
- **Successfully converted to gene symbols**: 17 (37%)
- **Non-coding RNAs/Pseudogenes**: 27 (59%)
- **Unknown/Not found**: 2 (4%)
- **Total count (n)**: 5,456

## Top Events by Count

| Rank | Gene Names | Count (n) | Biological Significance |
|------|------------|-----------|------------------------|
| 1 | [ncRNA];[ncRNA];[ncRNA] | 896 | **Highest**: Non-coding RNA cluster |
| 2 | ENSG00000185829 ([ncRNA]) | 531 | **Single ncRNA** with high expression |
| 3 | GINM1;[ncRNA];LATS1 | 520 | **Membrane protein + Tumor suppressor** |
| 4 | [ncRNA];[ncRNA];[ncRNA] | 368 | Non-coding RNA cluster |
| 5 | [ncRNA];[ncRNA];[ncRNA] | 350 | Non-coding RNA cluster |
| 6 | [ncRNA];[ncRNA];[ncRNA];[ncRNA] | 339 | Non-coding RNA cluster |
| 7 | [ncRNA];[ncRNA];[ncRNA];[ncRNA] | 330 | Non-coding RNA cluster |
| 8 | [ncRNA];GINM1;[ncRNA];LATS1 | 200 | Membrane protein + Tumor suppressor |
| 9 | CRHR1;[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA] | 186 | **Hormone receptor** + ncRNAs |
| 10 | [ncRNA];[ncRNA];[ncRNA] | 173 | Non-coding RNA cluster |

## Protein-Coding Genes Analysis

### Key Verified Genes (Total Occurrences):
1. **LATS1** (Large tumor suppressor kinase 1): 520 + 157 + 72 + 158 + 108 + 200 = **1,215 total**
2. **GINM1** (Glycosylated integral membrane protein 1): 520 + 200 = **720 total**
3. **CRHR1** (Corticotropin releasing hormone receptor 1): 2 + 186 + 36 + 88 + 3 = **315 total**
4. **FAM227A** (Family with sequence similarity 227 member A): 13 + 10 + 132 = **155 total**
5. **SUN2** (Sad1 and UNC84 domain containing 2): 13 + 69 = **82 total**

### Functional Categories:

#### **Cell Cycle & Tumor Suppression** (Dominant):
- **LATS1**: Most frequent gene, critical tumor suppressor in Hippo pathway

#### **Membrane Biology**:
- **GINM1**: Glycosylated membrane protein
- **SUN2**: Nuclear envelope protein
- **TMEM258**: Transmembrane protein

#### **Signal Transduction**:
- **CRHR1**: Hormone receptor for stress response
- **CBY1**: Beta-catenin antagonist

#### **RNA Processing**:
- **DDX17**: RNA helicase

#### **Metabolic Enzymes**:
- **MGAT3**: Glycosylation enzyme

## Key Biological Insights

### **1. Tumor Suppressor Dominance**:
**LATS1** appears in 1,215 total occurrences, making it the most significant gene. Often paired with **GINM1**, suggesting a membrane-tumor suppressor axis.

### **2. Non-coding RNA Prominence**:
- **59% of ENSG IDs** are non-coding RNAs
- Highest single count (896) is a non-coding RNA cluster
- Suggests extensive regulatory network activity

### **3. Co-expression Patterns**:
- **LATS1 + GINM1**: Frequent pairing suggests functional relationship
- **CRHR1 + ncRNAs**: Hormone signaling with regulatory RNAs
- **Multi-gene clusters**: DDX17,CBY1 and SNU13 are associated with one variant and they only present in Exc.

### **4. Stress Response Theme**:
**CRHR1** (corticotropin releasing hormone receptor) appears frequently, suggesting stress/hormone response pathways are active.

## Clinical Relevance

- **LATS1** mutations are associated with cancer development
- **CRHR1** is involved in stress response and depression
- **GINM1** may play roles in membrane integrity and cellular adhesion
- High ncRNA content suggests active gene regulation, typical in development or disease states

## Notes

- **[ncRNA]**: Non-coding RNA or genes without official symbols
- **[Unknown]**: ENSG IDs that couldn't be converted (ENSG00000100221, ENSG00000100138)
- **Co-expression clusters**: Multiple genes in single entries suggest coordinated biological processes
- **Regulatory focus**: High proportion of non-coding elements indicates active transcriptional regulation

In [92]:
dim(AD_DNE)
length(unique(AD_DNE$variant_ID))

In [93]:
AD_DNE_overlap_xQTL = AD_DNE |> inner_join(combined_data, by = "variant_ID")

In [94]:
dim(AD_DNE_overlap_xQTL)
length(unique(AD_DNE_overlap_xQTL$variant_ID))


In [95]:
prop = (1868/1952)*100
prop

In [96]:
unique(AD_DNE_overlap_xQTL$event_ID.x)

In [97]:
fwrite(AD_DNE_overlap_xQTL,"project/image_QTL/CB_M1/AD_DNE/AD_DNE_overlap_xQTL.tsv", sep = '\t')

In [98]:
gene_AD_DNE_overlap_xQTL = AD_DNE_overlap_xQTL |> count(event_ID.y)
fwrite(gene_AD_DNE_overlap_xQTL, "project/image_QTL/CB_M1/AD_DNE/gene_AD_DNE_overlap_xQTL.txt", sep = '\t')

In [208]:
Exc = fread("~/project/OUTPUT/AD_DNE_overlap_xQTL_multigene.tsv") #|> filter(str_detect(Modality, "Exc_"))

In [209]:
dim(Exc)

In [210]:
exc_last  = Exc |> filter(str_detect(event_ID.y, "ENSG00000131023"))

In [211]:
exc_last |> count(Modality)

Modality,n
<chr>,<int>
AC,119
Ast_mega,200
Exc_DeJager,94
Exc_mega,64
Inh_DeJager,157
Inh_mega,72
OPC_DeJager,201
OPC_mega,120
Oli_DeJager,108
Oli_mega,80


### AD_DNE

| `event_ID.y`                                                                                                                                                                                                    | `n` | `gene_name`                                                                                                 | `event_IDs`                                             |
| --------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | --- | ----------------------------------------------------------------------------------------------------------- | ------------------------------------------------------- |
| ENSG00000055211;ENSG00000120253;ENSG00000131023                                                                                                                                                                 | 520 | GINM1;NUP43;LATS1                                                                                           | image\_Aging2                                           |
| ENSG00000100201;ENSG00000100211;ENSG00000100221;ENSG00000100242;ENSG00000184949;ENSG00000100138                                                                                                                 | 13  | DDX17;CBY1;JOSD1;SUN2;FAM227A;NA                                                                            | image\_Aging3; AD\_Wightman\_Full\_2021                 |
| ENSG00000100201;ENSG00000100211;ENSG00000100221;ENSG00000128268;ENSG00000184949                                                                                                                                 | 10  | DDX17;CBY1;JOSD1;NA;FAM227A                                                                                 | image\_Aging3; AD\_Wightman\_Full\_2021                 |
| ENSG00000100242                                                                                                                                                                                                 | 69  | SUN2                                                                                                        | image\_Aging3; AD\_Wightman\_Full\_2021                 |
| ENSG00000108771;ENSG00000037042;ENSG00000108825;ENSG00000159314;ENSG00000263715;ENSG00000120088;ENSG00000185294;ENSG00000120071;ENSG00000176681;ENSG00000185829;ENSG00000228696;ENSG00000238083;ENSG00000262633 | 2   | NA;NA;PTGES3L-AARSD1;ARHGAP27;LINC02210-CRHR1;CRHR1;SPPL2C;KANSL1;LRRC37A;ARL17A;ARL17B;LRRC37A2;AC005670.2 | image\_AD1                                              |
| ENSG00000120071;ENSG00000073969;ENSG00000185829;ENSG00000228696;ENSG00000238083                                                                                                                                 | 173 | KANSL1;NSF;ARL17A;ARL17B;LRRC37A2                                                                           | image\_AD1; image\_Aging5; AD\_Wightman\_Full\_2021     |
| ENSG00000120071;ENSG00000185829;ENSG00000228696;ENSG00000238083                                                                                                                                                 | 140 | KANSL1;ARL17A;ARL17B;LRRC37A2                                                                               | image\_AD1; image\_Aging5; AD\_Wightman\_Full\_2021     |
| ENSG00000120088;ENSG00000120071;ENSG00000176681;ENSG00000185829;ENSG00000228696;ENSG00000238083                                                                                                                 | 186 | CRHR1;KANSL1;LRRC37A;ARL17A;ARL17B;LRRC37A2                                                                 | image\_AD1; image\_Aging5; AD\_Wightman\_Full\_2021     |
| ENSG00000120253;ENSG00000120256;ENSG00000131023                                                                                                                                                                 | 157 | NUP43;LRP11;LATS1                                                                                           | image\_Aging2                                           |
| ENSG00000120253;ENSG00000120265;ENSG00000131023                                                                                                                                                                 | 72  | NUP43;PCMT1;LATS1                                                                                           | image\_Aging2                                           |
| ENSG00000120253;ENSG00000120265;ENSG00000131023;ENSG00000186625                                                                                                                                                 | 158 | NUP43;PCMT1;LATS1;KATNA1                                                                                    | image\_Aging2                                           |
| ENSG00000120253;ENSG00000131023                                                                                                                                                                                 | 108 | NUP43;LATS1                                                                                                 | image\_Aging2                                           |
| ENSG00000131013;ENSG00000055211;ENSG00000120253;ENSG00000131023                                                                                                                                                 | 200 | PPIL4;GINM1;NUP43;LATS1                                                                                     | image\_Aging2                                           |
| ENSG00000136448;ENSG00000159314;ENSG00000225190;ENSG00000263715;ENSG00000120088;ENSG00000185294;ENSG00000256762;ENSG00000120071;ENSG00000176681;ENSG00000228696;ENSG00000238083                                 | 36  | NA;ARHGAP27;PLEKHM1;LINC02210-CRHR1;CRHR1;SPPL2C;STH;KANSL1;LRRC37A;ARL17B;LRRC37A2                         | image\_AD1; image\_Aging5; AD\_Wightman\_Full\_2021     |
| ENSG00000138439;ENSG00000138442;ENSG00000138380;ENSG00000144426                                                                                                                                                 | 339 | ICA1L;WDR12;CARF;NBEAL1                                                                                     | image\_Aging5; AD\_Bellenguez; AD\_Wightman\_Full\_2021 |
| ENSG00000138439;ENSG00000163596;ENSG00000138380                                                                                                                                                                 | 350 | ICA1L;ICA1L;CARF                                                                                            | image\_Aging5; AD\_Bellenguez; AD\_Wightman\_Full\_2021 |
| ENSG00000138439;ENSG00000163596;ENSG00000138442;ENSG00000138380                                                                                                                                                 | 330 | ICA1L;ICA1L;WDR12;CARF                                                                                      | image\_Aging5; AD\_Bellenguez; AD\_Wightman\_Full\_2021 |
| ENSG00000138442;ENSG00000144426                                                                                                                                                                                 | 72  | WDR12;NBEAL1                                                                                                | image\_Aging5; AD\_Bellenguez; AD\_Wightman\_Full\_2021 |
| ENSG00000159314;ENSG00000225190;ENSG00000263715;ENSG00000120088;ENSG00000185294;ENSG00000256762;ENSG00000120071;ENSG00000176681;ENSG00000228696;ENSG00000238083                                                 | 88  | ARHGAP27;PLEKHM1;LINC02210-CRHR1;CRHR1;SPPL2C;STH;KANSL1;LRRC37A;ARL17B;LRRC37A2                            | image\_AD1; image\_Aging5; AD\_Wightman\_Full\_2021     |
| ENSG00000159314;ENSG00000263715;ENSG00000120088;ENSG00000185294;ENSG00000120071;ENSG00000176681;ENSG00000185829;ENSG00000228696;ENSG00000238083;ENSG00000262633;ENSG00000141232                                 | 3   | ARHGAP27;LINC02210-CRHR1;CRHR1;SPPL2C;KANSL1;LRRC37A;ARL17A;ARL17B;LRRC37A2;AC005670.2;NA                   | image\_AD1; image\_Aging5; AD\_Wightman\_Full\_2021     |
| ENSG00000163596;ENSG00000138380                                                                                                                                                                                 | 368 | ICA1L;CARF                                                                                                  | image\_Aging5; AD\_Bellenguez; AD\_Wightman\_Full\_2021 |
| ENSG00000163596;ENSG00000138380;ENSG00000144426                                                                                                                                                                 | 896 | ICA1L;CARF;NBEAL1                                                                                           | image\_Aging5; AD\_Bellenguez; AD\_Wightman\_Full\_2021 |
| ENSG00000184949                                                                                                                                                                                                 | 132 | FAM227A                                                                                                     | image\_Aging3; AD\_Wightman\_Full\_2021                 |
| ENSG00000185829                                                                                                                                                                                                 | 531 | ARL17A                                                                                                      | image\_AD1; image\_Aging5; AD\_Wightman\_Full\_2021     |
| ENSG00000186868;ENSG00000120071;ENSG00000176681;ENSG00000185829;ENSG00000228696;ENSG00000238083                                                                                                                 | 3   | MAPT;KANSL1;LRRC37A;ARL17A;ARL17B;LRRC37A2                                                                  | image\_AD1; image\_Aging5; AD\_Wightman\_Full\_2021     |
| ENSG00000228696                                                                                                                                                                                                 | 4   | ARL17B                                                                                                      | image\_AD1; image\_Aging5; AD\_Wightman\_Full\_2021     |
|                                                                                                                                                                                                                 | 91  |                                                                                                             |                                                         |


### 🧠 Gene Functional Summary

| Gene                  | Full Name                                    | Function                                                                                                                              | Relevance to Aging / AD                                                                                                                                                |
| --------------------- | -------------------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------- |
| **CRHR1**             | *Corticotropin-Releasing Hormone Receptor 1* | G-protein-coupled receptor involved in the stress response via the HPA axis.                                                          | Stress-related signaling is implicated in cognitive decline, depression, and AD. Genetic variants are associated with hippocampal volume and memory.                   |
| **KANSL1**            | *KAT8 Regulatory NSL Complex Subunit 1*      | Part of the NSL complex involved in histone H4 acetylation and chromatin remodeling.                                                  | Strongly implicated in 17q21.31 inversion haplotype. Linked to tau pathology and neurodevelopment. Deletions cause Koolen-de Vries syndrome (with cognitive deficits). |
| **LRRC37A / LRRC37B** | *Leucine Rich Repeat Containing 37A/B*       | Poorly characterized; located near MAPT and KANSL1 on 17q21.31. May play a role in cytoskeletal organization and vesicle trafficking. | Frequently co-inherited with MAPT haplotypes. Variants are associated with risk loci for AD, PD, and schizophrenia. Possibly modulate MAPT expression.                 |
| **NMT1**              | *N-myristoyltransferase 1*                   | Catalyzes protein myristoylation—a lipid modification important for membrane localization and signal transduction.                    | Emerging role in neurodegeneration. Myristoylation affects proteins involved in AD, including BACE1 and Tau localization. Potential therapeutic target.                |
### MULTI-GENE CLUSTER
- **LAST, NUP43, GINM1, PPIL4, PCMT1, KATNA1** are associated with the same variants, but the former three genes are avalaible in all contexts, KATNA1 only present in Exc.PCMT1 presents in either Exc or Inh.
---

### 🔗 Shared Features

* **All four genes are physically or functionally linked to 17q21.31**, a known hotspot in Alzheimer's genetics.
* **Regulation of gene expression, signal transduction, and vesicle/membrane dynamics** appear to be shared pathways.
* **Stress response (CRHR1)** and **chromatin structure (KANSL1)** intersect with neuronal plasticity and neuroinflammation.
* **NMT1’s role in protein modification** may indirectly regulate the localization and function of proteins like tau or amyloid precursors.



### PD_DNE

In [193]:
dim(PD_DNE)
length(unique(PD_DNE$variant_ID))

In [194]:
PD_DNE_overlap_xQTL = PD_DNE |> inner_join(combined_data, by = "variant_ID")

In [195]:
dim(PD_DNE_overlap_xQTL)
length(unique(PD_DNE_overlap_xQTL$variant_ID))


In [196]:
prop = (150/152)*100
prop

In [197]:
unique(PD_DNE_overlap_xQTL$event_ID.x)

In [198]:
fwrite(PD_DNE_overlap_xQTL,"PD_DNE_overlap_xQTL.tsv", sep = '\t')

In [199]:
gene_PD_DNE_overlap_xQTL = PD_DNE_overlap_xQTL |> count(event_ID.y)
fwrite(gene_PD_DNE_overlap_xQTL, "gene_PD_DNE_overlap_xQTL.txt", sep = '\t')

# Non-coding RNA Cluster ENSG ID Conversion

## Individual ENSG ID Conversions

| ENSG ID | Gene Symbol | Gene Name | Status |
|---------|-------------|-----------|---------|
| ENSG00000037042 | TUBG2 | Tubulin gamma 2 | ✓ Verified |
| ENSG00000073969 | [ncRNA] | [Likely non-coding RNA] | ❓ No gene symbol |
| ENSG00000108771 | [ncRNA] | [Likely non-coding RNA] | ❓ No gene symbol |
| ENSG00000108825 | [ncRNA] | [Likely non-coding RNA] | ❓ No gene symbol |
| ENSG00000120071 | [ncRNA] | [Likely non-coding RNA] | ❓ No gene symbol |
| ENSG00000120088 | CRHR1 | Corticotropin releasing hormone receptor 1 | ✓ Verified |
| ENSG00000136448 | [ncRNA] | [Likely non-coding RNA] | ❓ No gene symbol |
| ENSG00000141232 | [ncRNA] | [Likely non-coding RNA] | ❓ No gene symbol |
| ENSG00000159314 | [ncRNA] | [Likely non-coding RNA] | ❓ No gene symbol |
| ENSG00000176681 | [ncRNA] | [Likely non-coding RNA] | ❓ No gene symbol |
| ENSG00000185294 | [ncRNA] | [Likely non-coding RNA] | ❓ No gene symbol |
| ENSG00000185829 | [ncRNA] | [Likely non-coding RNA] | ❓ No gene symbol |
| ENSG00000186868 | [ncRNA] | [Likely non-coding RNA] | ❓ No gene symbol |
| ENSG00000225190 | [ncRNA] | [Likely non-coding RNA] | ❓ No gene symbol |
| ENSG00000228696 | [ncRNA] | [Likely non-coding RNA] | ❓ No gene symbol |
| ENSG00000238083 | [ncRNA] | [Likely non-coding RNA] | ❓ No gene symbol |
| ENSG00000256762 | [ncRNA] | [Likely non-coding RNA] | ❓ No gene symbol |
| ENSG00000262633 | [ncRNA] | [Likely non-coding RNA] | ❓ No gene symbol |
| ENSG00000263715 | [ncRNA] | [Likely non-coding RNA] | ❓ No gene symbol |

## Original Data with Gene Name Conversions

| Original event_ID.y | Converted Gene Names | n |
|---------------------|---------------------|---|
| ENSG00000108771;ENSG00000037042;ENSG00000108825;ENSG00000159314;ENSG00000263715;ENSG00000120088;ENSG00000185294;ENSG00000120071;ENSG00000176681;ENSG00000185829;ENSG00000228696;ENSG00000238083;ENSG00000262633 | [ncRNA];TUBG2;[ncRNA];[ncRNA];[ncRNA];CRHR1;[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA] | 2 |
| ENSG00000120071;ENSG00000073969;ENSG00000185829;ENSG00000228696;ENSG00000238083 | [ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA] | 173 |
| ENSG00000120071;ENSG00000185829;ENSG00000228696;ENSG00000238083 | [ncRNA];[ncRNA];[ncRNA];[ncRNA] | 140 |
| ENSG00000120088;ENSG00000120071;ENSG00000176681;ENSG00000185829;ENSG00000228696;ENSG00000238083 | CRHR1;[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA] | 186 |
| ENSG00000136448;ENSG00000159314;ENSG00000225190;ENSG00000263715;ENSG00000120088;ENSG00000185294;ENSG00000256762;ENSG00000120071;ENSG00000176681;ENSG00000228696;ENSG00000238083 | [ncRNA];[ncRNA];[ncRNA];[ncRNA];CRHR1;[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA] | 36 |
| ENSG00000159314;ENSG00000225190;ENSG00000263715;ENSG00000120088;ENSG00000185294;ENSG00000256762;ENSG00000120071;ENSG00000176681;ENSG00000228696;ENSG00000238083 | [ncRNA];[ncRNA];[ncRNA];CRHR1;[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA] | 88 |
| ENSG00000159314;ENSG00000263715;ENSG00000120088;ENSG00000185294;ENSG00000120071;ENSG00000176681;ENSG00000185829;ENSG00000228696;ENSG00000238083;ENSG00000262633;ENSG00000141232 | [ncRNA];[ncRNA];CRHR1;[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA] | 3 |
| ENSG00000185829 | [ncRNA] | 531 |
| ENSG00000186868;ENSG00000120071;ENSG00000176681;ENSG00000185829;ENSG00000228696;ENSG00000238083 | [ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA];[ncRNA] | 3 |
| ENSG00000228696 | [ncRNA] | 4 |
| [Empty] | [No gene ID] | 91 |

## Simplified Gene Symbol Table

| Gene Symbols | Count (n) |
|-------------|-----------|
| [ncRNA];TUBG2;[ncRNA];[ncRNA];[ncRNA];CRHR1;[Multiple ncRNAs] | 2 |
| [Multiple ncRNAs] | 173 |
| [Multiple ncRNAs] | 140 |
| CRHR1;[Multiple ncRNAs] | 186 |
| [ncRNA];[ncRNA];[ncRNA];[ncRNA];CRHR1;[Multiple ncRNAs] | 36 |
| [ncRNA];[ncRNA];[ncRNA];CRHR1;[Multiple ncRNAs] | 88 |
| [ncRNA];[ncRNA];CRHR1;[Multiple ncRNAs] | 3 |
| [Single ncRNA] | 531 |
| [Multiple ncRNAs] | 3 |
| [Single ncRNA] | 4 |
| [No genes] | 91 |

## Summary Statistics

- **Total entries**: 11
- **Total unique ENSG IDs**: 19
- **Protein-coding genes**: 2 (11%)
  - TUBG2 (Tubulin gamma 2)
  - CRHR1 (Corticotropin releasing hormone receptor 1)
- **Non-coding RNAs**: 17 (89%)
- **Total count (n)**: 1,257

## Key Gene Analysis

### **Protein-Coding Genes (Only 2 identified):**

#### **CRHR1 (Corticotropin Releasing Hormone Receptor 1):**
- **Total occurrences**: 2 + 186 + 36 + 88 + 3 = **315**
- **Function**: Stress hormone receptor, involved in HPA axis
- **Clinical relevance**: Depression, anxiety, stress disorders
- **Pattern**: Always appears with multiple ncRNAs

#### **TUBG2 (Tubulin Gamma 2):**
- **Total occurrences**: 2
- **Function**: Cytoskeletal protein, microtubule organization
- **Pattern**: Appears only once in large cluster

### **Non-coding RNA Dominant Patterns:**

#### **ENSG00000185829 (Single ncRNA with highest expression):**
- **Count**: 531 (42% of total dataset)
- **Pattern**: Appears both alone and in clusters
- **Significance**: Major regulatory RNA

#### **ENSG00000228696 (Another significant ncRNA):**
- **Individual count**: 4
- **Cluster appearances**: Multiple entries
- **Pattern**: Frequently co-expressed with other ncRNAs

## Functional Analysis

### **1. Regulatory RNA Network (89% of genes):**
This dataset is **dominated by non-coding RNAs**, suggesting:
- Active transcriptional regulation
- Developmental processes
- Disease states with altered gene expression
- Stress response regulation

### **2. Stress Response Theme:**
**CRHR1** (stress hormone receptor) appears in 5 out of 10 gene-containing entries, indicating:
- HPA (Hypothalamic-Pituitary-Adrenal) axis activation
- Stress/cortisol signaling pathways
- Potential depression or anxiety-related states

### **3. Co-expression Clusters:**
- **Large clusters**: Up to 13 genes per entry
- **CRHR1 + ncRNAs**: Consistent pattern suggesting coordinated stress response
- **ncRNA-only clusters**: Pure regulatory networks

## Biological Significance

### **Stress Response + Gene Regulation:**
The combination of:
- **CRHR1** (stress hormone signaling)
- **Multiple ncRNAs** (gene regulation)
- **TUBG2** (cytoskeletal changes)

Suggests this dataset may represent:
- **Stress-induced transcriptional changes**
- **Depression/anxiety molecular signatures**
- **Developmental stress responses**
- **Disease states with altered gene regulation**

### **Clinical Implications:**
- **CRHR1** is a target for depression treatments
- High ncRNA content suggests **epigenetic regulation**
- **Coordinated expression** indicates **pathway-level changes**

## Key Patterns

1. **CRHR1** consistently clusters with ncRNAs (stress + regulation)
2. **ARL17A** has highest individual expression (531 counts). Predicted to enable GTP binding activity. Predicted to be involved in intracellular protein transport and vesicle-mediated transport.
3. **89% non-coding genes** = major regulatory focus
4. **Multi-gene clusters** suggest coordinated biological networks
5. **Empty entries** (91 counts) may represent background or controls

## Notes

- **ARL17A**: Non-coding RNA without official gene symbol
- **Cluster analysis**: Most entries contain 4-13 genes suggesting co-regulation
- **Stress focus**: CRHR1 prominence indicates stress/hormone pathways
- **Regulatory dominance**: 89% ncRNAs suggests active transcriptional control

In [None]:
## Annotate the gene list positions based on our v103 genomic reference
!sos run xqtl-protocol/code/data_preprocessing/phenotype/gene_annotation.ipynb annotate_coord \
    --cwd OUTPUT \
    --phenoFile gene_PD_DNE_overlap_xQTL.txt \
    --phenotype-id-column gene_id \
    --molecular-trait-type gene \
    --coordinate-annotation data/resource/references/Homo_sapiens.GRCh38.103.chr.reformatted.collapse_only.gene.ERCC.gtf

In [101]:
annotated_PD_dne_gene = fread("~/OUTPUT/gene_PD_DNE_overlap_xQTL.gene_list.tsv", sep = '\t')

In [102]:
coloc_outcome = PD_DNE_overlap_xQTL |> select(event_ID.x, gene_ID) |> unique()

In [103]:
annotated_PD_dne_gene = annotated_PD_dne_gene |> inner_join(coloc_outcome, by = c("gene_id" = "gene_ID"))

In [104]:
annotated_PD_dne_gene = annotated_PD_dne_gene |> rename(event_ID = event_ID.x)

In [105]:
fwrite(annotated_PD_dne_gene, "annotated_PD_dne_gene.tsv", sep = '\t')

## single finemapping

In [99]:
meta_file = meta_file |> filter(Method == "single_context_finemapping")

In [100]:
combined_data = combined_data |> filter(cs_coverage_0.95 > 0)

In [101]:
# Assuming your meta_file dataframe is already loaded
# If not, you'd need to recreate it from your data

# Create a function to read a single file and add identifiers
read_topolci_file <- function(path, cohort, data_type, modality) {
  # Read the file - assuming tab-delimited with gzip compression
  data <- read_tsv(path, show_col_types = FALSE)
  
  # Add identifier columns
  data$Cohort <- cohort
  data$Data_Type <- data_type
  data$Modality <- modality
  
  # Create combined identifier
  data$Cohort_Data_Type_Modality <- paste(cohort, data_type, modality, sep = "_")
  
  return(data)
}

# Apply the function to each row in the meta_file
combined_data <- pmap_dfr(
  list(
    path = meta_file$Path,
    cohort = meta_file$Cohort,
    data_type = meta_file$Data_Type,
    modality = meta_file$Modality
  ),
  read_topolci_file
)

# Save the combined dataset if needed
#fwrite(combined_data, "combined_top_loci_data.tsv")

# Preview the result
head(combined_data)

#chr,start,end,a1,a2,variant_ID,gene_ID,event_ID,cs_coverage_0.95,cs_coverage_0.7,cs_coverage_0.5,PIP,conditional_effect,Cohort,Data_Type,Modality,Cohort_Data_Type_Modality,cluster,cat,ID
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,903284,903285,C,T,chr1:903285:T:C,ENSG00000235169,Knight_eQTL_brain_ENSG00000235169,0,0,0,0.04257485,-0.8420827,KNIGHT,eQTL,Brain,KNIGHT_eQTL_Brain,,,
1,903351,903352,A,G,chr1:903352:G:A,ENSG00000235169,Knight_eQTL_brain_ENSG00000235169,0,0,0,0.04257485,-0.8420827,KNIGHT,eQTL,Brain,KNIGHT_eQTL_Brain,,,
1,903509,903510,G,A,chr1:903510:A:G,ENSG00000235169,Knight_eQTL_brain_ENSG00000235169,0,0,0,0.04257485,-0.8420827,KNIGHT,eQTL,Brain,KNIGHT_eQTL_Brain,,,
1,903535,903536,T,A,chr1:903536:A:T,ENSG00000235169,Knight_eQTL_brain_ENSG00000235169,0,0,0,0.04257485,-0.8420827,KNIGHT,eQTL,Brain,KNIGHT_eQTL_Brain,,,
1,904080,904081,C,T,chr1:904081:T:C,ENSG00000235169,Knight_eQTL_brain_ENSG00000235169,0,0,0,0.04257485,-0.8420827,KNIGHT,eQTL,Brain,KNIGHT_eQTL_Brain,,,
1,904114,904115,T,G,chr1:904115:G:T,ENSG00000235169,Knight_eQTL_brain_ENSG00000235169,0,0,0,0.04257485,-0.8420827,KNIGHT,eQTL,Brain,KNIGHT_eQTL_Brain,,,


In [107]:
combined_data |> filter(gene_ID == "ENSG00000100201" & cs_coverage_0.95 > 0 & str_detect(event_ID, "DLPFC")) |> arrange(desc(PIP))

#chr,start,end,a1,a2,variant_ID,gene_ID,event_ID,cs_coverage_0.95,cs_coverage_0.7,cs_coverage_0.5,PIP,conditional_effect,Cohort,Data_Type,Modality,Cohort_Data_Type_Modality,cluster,cat,ID
<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
22,37557663,37557664,C,G,chr22:37557664:G:C,ENSG00000100201,ROSMAP_DLPFC_sQTL_chr22:38486440:38488016:clu_173569_-:PR:ENSG00000100201,1,1,1,0.9966823,0.4723593,ROSMAP,sQTL,DLPFC,ROSMAP_sQTL_DLPFC,clu_173569_,PR,chr22:38486440:38488016:clu_173569_-:PR
22,40193866,40193867,GAG,AAG,chr22:40193867:AAG:GAG,ENSG00000100201,DLPFC_DeJager_eQTL_ENSG00000100201,2,2,2,0.9966031,0.773087,ROSMAP,eQTL,DLPFC,ROSMAP_eQTL_DLPFC,,,
22,38192616,38192617,A,G,chr22:38192617:G:A,ENSG00000100201,ROSMAP_DLPFC_sQTL_chr22:38490436:38492055:clu_173570_-:UP:ENSG00000100201,1,1,1,0.9604486,0.8721438,ROSMAP,sQTL,DLPFC,ROSMAP_sQTL_DLPFC,clu_173570_,UP,chr22:38490436:38492055:clu_173570_-:UP


## ColocBoost

In [29]:
all_cb = fread("~/project/image_QTL/CB_M1/Image_AD_aging_PD_meta_colocboost_export_filtered_ucos.bed")

In [30]:
head(all_cb)
dim(all_cb)

#chr,start,end,a1,a2,variant_ID,region_ID,event_ID,cos_ID,vcp,cos_npc,min_npc_outcome,coef,z
<chr>,<int>,<int>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<chr>,<chr>
chr1,161189357,161189357,T,C,chr1:161189357:C:T,chr1_158933969_162352348,AD_Bellenguez; AD_Wightman_Excluding23andMe_2021,chr1_158933969_162352348:cos1:y7_y10,0.148633848,1,1,-0.00237822429413167;-0.00126943889431771,-5.66666666666667;-5.39920492844837
chr1,161215268,161215268,T,G,chr1:161215268:G:T,chr1_158933969_162352348,AD_Bellenguez; AD_Wightman_Excluding23andMe_2021,chr1_158933969_162352348:cos1:y7_y10,0.213683842,1,1,-0.00314917325108774;-0.0015555119117074,-5.72826086956522;-5.44139840072206
chr1,161217875,161217875,T,C,chr1:161217875:C:T,chr1_158933969_162352348,AD_Bellenguez; AD_Wightman_Excluding23andMe_2021,chr1_158933969_162352348:cos1:y7_y10,0.630037093,1,1,-0.0035129752975932;-0.00589550094118934,-5.71276595744681;-5.78719041195195
chr1,200905600,200905600,G,A,chr1:200905600:A:G,chr1_200496446_202852808,AD_Wightman_Full_2021; AD_Wightman_Excluding23andMe_2021,chr1_200496446_202852808:cos1:y10_y11,0.007671881,1,1,9.234320806912e-05;0.000139149039132946,4.53950141738719;4.29121674501588
chr1,200905967,200905967,C,T,chr1:200905967:T:C,chr1_200496446_202852808,AD_Wightman_Full_2021; AD_Wightman_Excluding23andMe_2021,chr1_200496446_202852808:cos1:y10_y11,0.008368034,1,1,0.000104268954830228;0.000138364785535695,4.58025882916672;4.28441576043348
chr1,200906114,200906114,A,T,chr1:200906114:T:A,chr1_200496446_202852808,AD_Wightman_Full_2021; AD_Wightman_Excluding23andMe_2021,chr1_200496446_202852808:cos1:y10_y11,0.008713429,1,1,0.000107208760913441;0.000142027094052532,4.58984897079625;4.29387534466063


In [31]:
TL1_DNE = all_cb |> filter(str_detect(event_ID,"image_") & str_detect(event_ID,"Telomere_pc1"))
mvAge_DNE = all_cb |> filter(str_detect(event_ID,"image_") & str_detect(event_ID,"mvage"))
PD_DNE = all_cb |> filter(str_detect(event_ID,"image_") & str_detect(event_ID,"PD"))
aging_DNE = rbind(TL1_DNE,mvAge_DNE)
AD_DNE = all_cb |> filter(str_detect(event_ID,"image_") & str_detect(event_ID,"AD_"))
DNE = all_cb |> filter(str_detect(event_ID,"image_"))

### Aging_new_DNE

In [32]:
dim(aging_DNE)
length(unique(aging_DNE$variant_ID))

In [33]:
aging_DNE_overlap_xQTL = aging_DNE |> inner_join(combined_data, by = "variant_ID")

In [34]:
dim(aging_DNE_overlap_xQTL)
length(unique(aging_DNE_overlap_xQTL$variant_ID))


In [35]:
prop = (282/858)*100
prop

In [36]:
unique(aging_DNE_overlap_xQTL$event_ID.x)

In [37]:
fwrite(aging_DNE_overlap_xQTL,"aging_new_DNE_overlap_xQTL.tsv", sep = '\t')

In [38]:
gene_aging_DNE_overlap_xQTL = aging_DNE_overlap_xQTL |> count(gene_ID)
fwrite(gene_aging_DNE_overlap_xQTL, "gene_aging_new_DNE_overlap_xQTL.txt", sep = '\t')

In [None]:
## Annotate the gene list positions based on our v103 genomic reference
!sos run xqtl-protocol/code/data_preprocessing/phenotype/gene_annotation.ipynb annotate_coord \
    --cwd project/image_QTL/CB_M1/Aging_DNE \
    --phenoFile gene_aging_new_DNE_overlap_xQTL.txt \
    --phenotype-id-column gene_id \
    --molecular-trait-type gene \
    --coordinate-annotation data/resource/references/Homo_sapiens.GRCh38.103.chr.reformatted.collapse_only.gene.ERCC.gtf

In [39]:
annotated_aging_dne_gene = fread("~/project/image_QTL/CB_M1/Aging_DNE/gene_aging_new_DNE_overlap_xQTL.gene_list.tsv", sep = '\t')

In [40]:
coloc_outcome = aging_DNE_overlap_xQTL |> select(event_ID.x, gene_ID) |> unique()

In [41]:
annotated_aging_dne_gene = annotated_aging_dne_gene |> inner_join(coloc_outcome, by = c("gene_id" = "gene_ID"))

In [42]:
annotated_aging_dne_gene = annotated_aging_dne_gene |> rename(event_ID = event_ID.x)

In [43]:
fwrite(annotated_aging_dne_gene, "project/image_QTL/CB_M1/Aging_DNE/annotated_aging_new_dne_gene.tsv", sep = '\t')

### AD_DNE

In [44]:
dim(AD_DNE)
length(unique(AD_DNE$variant_ID))

In [45]:
AD_DNE_overlap_xQTL = AD_DNE |> inner_join(combined_data, by = "variant_ID")

In [46]:
dim(AD_DNE_overlap_xQTL)
length(unique(AD_DNE_overlap_xQTL$variant_ID))


In [47]:
prop = (1949/1952)*100
prop

In [48]:
unique(AD_DNE_overlap_xQTL$event_ID.x)

In [49]:
fwrite(AD_DNE_overlap_xQTL,"AD_DNE_overlap_xQTL.tsv", sep = '\t')

In [50]:
gene_AD_DNE_overlap_xQTL = AD_DNE_overlap_xQTL |> count(gene_ID)
fwrite(gene_AD_DNE_overlap_xQTL, "gene_AD_DNE_overlap_xQTL.txt", sep = '\t')

In [None]:
## Annotate the gene list positions based on our v103 genomic reference
!sos run xqtl-protocol/code/data_preprocessing/phenotype/gene_annotation.ipynb annotate_coord \
    --cwd project/image_QTL/CB_M1/AD_DNE \
    --phenoFile gene_AD_DNE_overlap_xQTL.txt \
    --phenotype-id-column gene_id \
    --molecular-trait-type gene \
    --coordinate-annotation data/resource/references/Homo_sapiens.GRCh38.103.chr.reformatted.collapse_only.gene.ERCC.gtf

In [51]:
annotated_AD_dne_gene = fread("~/project/image_QTL/CB_M1/AD_DNE/gene_AD_DNE_overlap_xQTL.gene_list.tsv", sep = '\t')

In [52]:
coloc_outcome = AD_DNE_overlap_xQTL |> select(event_ID.x, gene_ID) |> unique()

In [53]:
annotated_AD_dne_gene = annotated_AD_dne_gene |> inner_join(coloc_outcome, by = c("gene_id" = "gene_ID"))

In [54]:
annotated_AD_dne_gene = annotated_AD_dne_gene |> rename(event_ID = event_ID.x)

In [55]:
fwrite(annotated_AD_dne_gene, "project/image_QTL/CB_M1/AD_DNE/annotated_AD_dne_gene.tsv", sep = '\t')

### PD_DNE

In [56]:
dim(PD_DNE)
length(unique(PD_DNE$variant_ID))

In [57]:
PD_DNE_overlap_xQTL = PD_DNE |> inner_join(combined_data, by = "variant_ID")

In [58]:
dim(PD_DNE_overlap_xQTL)
length(unique(PD_DNE_overlap_xQTL$variant_ID))


In [59]:
prop = (1561/1561)*100
prop

In [60]:
unique(PD_DNE_overlap_xQTL$event_ID.x)

In [61]:
fwrite(PD_DNE_overlap_xQTL,"PD_DNE_overlap_xQTL.tsv", sep = '\t')

In [62]:
gene_PD_DNE_overlap_xQTL = PD_DNE_overlap_xQTL |> count(gene_ID)
fwrite(gene_PD_DNE_overlap_xQTL, "gene_PD_DNE_overlap_xQTL.txt", sep = '\t')

In [None]:
## Annotate the gene list positions based on our v103 genomic reference
!sos run xqtl-protocol/code/data_preprocessing/phenotype/gene_annotation.ipynb annotate_coord \
    --cwd project/image_QTL/CB_M1/PD_DNE \
    --phenoFile gene_PD_DNE_overlap_xQTL.txt \
    --phenotype-id-column gene_id \
    --molecular-trait-type gene \
    --coordinate-annotation data/resource/references/Homo_sapiens.GRCh38.103.chr.reformatted.collapse_only.gene.ERCC.gtf

In [63]:
annotated_PD_dne_gene = fread("~/project/image_QTL/CB_M1/PD_DNE/gene_PD_DNE_overlap_xQTL.gene_list.tsv", sep = '\t')

In [64]:
coloc_outcome = PD_DNE_overlap_xQTL |> select(event_ID.x, gene_ID) |> unique()

In [65]:
annotated_PD_dne_gene = annotated_PD_dne_gene |> inner_join(coloc_outcome, by = c("gene_id" = "gene_ID"))

In [66]:
annotated_PD_dne_gene = annotated_PD_dne_gene |> rename(event_ID = event_ID.x)

In [81]:
fwrite(annotated_PD_dne_gene, "project/image_QTL/CB_M1/PD_DNE/annotated_PD_dne_gene.tsv", sep = '\t')

### DNE

In [68]:
dim(DNE)
length(unique(DNE$variant_ID))

In [69]:
DNE_overlap_xQTL = DNE |> inner_join(combined_data, by = "variant_ID")

In [70]:
dim(DNE_overlap_xQTL)
length(unique(DNE_overlap_xQTL$variant_ID))


In [72]:
prop = (8782/13925)*100
prop

In [73]:
unique(DNE_overlap_xQTL$event_ID.x)

In [74]:
fwrite(DNE_overlap_xQTL,"DNE_overlap_xQTL.tsv", sep = '\t')

In [75]:
gene_DNE_overlap_xQTL = DNE_overlap_xQTL |> count(gene_ID)
fwrite(gene_DNE_overlap_xQTL, "gene_DNE_overlap_xQTL.txt", sep = '\t')

In [None]:
## Annotate the gene list positions based on our v103 genomic reference
!sos run xqtl-protocol/code/data_preprocessing/phenotype/gene_annotation.ipynb annotate_coord \
    --cwd project/image_QTL/CB_M1/DNE \
    --phenoFile gene_DNE_overlap_xQTL.txt \
    --phenotype-id-column gene_id \
    --molecular-trait-type gene \
    --coordinate-annotation data/resource/references/Homo_sapiens.GRCh38.103.chr.reformatted.collapse_only.gene.ERCC.gtf

In [76]:
annotated_dne_gene = fread("~/project/image_QTL/CB_M1/DNE/gene_DNE_overlap_xQTL.gene_list.tsv", sep = '\t')

In [77]:
coloc_outcome = DNE_overlap_xQTL |> select(event_ID.x, gene_ID) |> unique()

In [78]:
annotated_dne_gene = annotated_dne_gene |> inner_join(coloc_outcome, by = c("gene_id" = "gene_ID"))

In [79]:
annotated_dne_gene = annotated_dne_gene |> rename(event_ID = event_ID.x)

In [80]:
fwrite(annotated_dne_gene, "project/image_QTL/CB_M1/DNE/annotated_dne_gene.tsv", sep = '\t')