# Fine-mapping of AD-related risk loci in European summary statistics
* Project: Cross-ancestry PAR
* Version: R/4.4
* Status: Complete
* Last Updated: 13-FEB-2025

## Notebook overview
* Extract chromosome and base pair positions from summary statistics for selected loci
* Perform fine-mapping and save results

In [1]:
library("data.table")
#if (!requireNamespace("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")
#BiocManager::install("snpStats")
library("robustbase")
library(ggplot2)
library(tidyr)
#devtools::install_github("chr1swallace/coloc")
library("coloc")
library("tidyverse")
library("readr")

This is coloc version 5.2.3

── [1mAttaching core tidyverse packages[22m ────────────────────────── tidyverse 2.0.0 ──
[32m✔[39m [34mdplyr    [39m 1.1.4     [32m✔[39m [34mreadr    [39m 2.1.5
[32m✔[39m [34mforcats  [39m 1.0.0     [32m✔[39m [34mstringr  [39m 1.5.1
[32m✔[39m [34mlubridate[39m 1.9.3     [32m✔[39m [34mtibble   [39m 3.2.1
[32m✔[39m [34mpurrr    [39m 1.0.2     
── [1mConflicts[22m ──────────────────────────────────────────── tidyverse_conflicts() ──
[31m✖[39m [34mdplyr[39m::[32mbetween()[39m     masks [34mdata.table[39m::between()
[31m✖[39m [34mdplyr[39m::[32mfilter()[39m      masks [34mstats[39m::filter()
[31m✖[39m [34mdplyr[39m::[32mfirst()[39m       masks [34mdata.table[39m::first()
[31m✖[39m [34mlubridate[39m::[32mhour()[39m    masks [34mdata.table[39m::hour()
[31m✖[39m [34mlubridate[39m::[32misoweek()[39m masks [34mdata.table[39m::isoweek()
[31m✖[39m [34mdplyr[39m::[32mlag()[39m         masks 

In [3]:
## Read dataframe
df0 <- fread("{WORK_DIR}/AD/summary_stats/Bellenguez_2022/noProxy/EADB-minus-UKB_Nov2022.tsv.gz", header =T)

In [4]:
head(df0)

MarkerName,CHR,BP,p_value,effect_allele,other_allele,effect_allele_frequency,beta,standard_error,n_cases,n_controls,het_isq,het_pvalue
<chr>,<int>,<int>,<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<int>,<int>,<dbl>,<dbl>
chr11:12541586:A:G,11,12541586,0.9486,A,G,0.3748,0.0007,0.0106,36659,63137,32.7,0.1371
chr8:102803998:G:A,8,102803998,0.1991,A,G,0.0026,-0.1579,0.123,32228,42115,0.0,0.7297
chr13:55101557:T:C,13,55101557,0.5626,T,C,0.9856,-0.0252,0.0436,36227,62057,9.0,0.3599
chr7:17885109:C:G,7,17885109,0.01901,C,G,0.9989,0.5638,0.2404,23331,28992,0.0,0.6406
chr3:97729697:A:G,3,97729697,0.8134,A,G,0.0183,-0.0126,0.0532,36131,56626,0.0,0.9542
chr19:22932213:G:C,19,22932213,0.8503,C,G,0.0008,0.0531,0.2812,26798,28624,0.0,0.4073


In [5]:
df0 <- df0 %>% separate(MarkerName, into = c("CHR", "BP", "REF", "ALT"), sep = ":", remove = FALSE) %>% mutate(CHR = str_remove(CHR, "chr")) %>% select(-REF, -ALT)

df0$Effect <- df0$beta
df0$StdErr <- df0$standard_error
df0$`P-value` <- df0$p_value

In [8]:
## EXTRACT CHRS - 100KB up & downstream from GWAS hits
# APOE rs429358 at chromosome 19, position 44,908,684 & rs7412 at chromosome 19, position 44,908,822
APOE_sumstats = subset(df0, CHR==19 & ((BP > 44808684 & BP < 45008684) | (BP > 44808822 & BP < 45008822)))
# CR1 rs679515 hit at chromosome 1, position 207,577,223 -> BP range: 207,477,223 - 207,677,223
CR1_sumstats = subset(df0, CHR==1 & BP > 207477223 & BP < 207677223)
# BIN1 rs6733839 hit at chromosome 2, position 127,135,234 -> BP range: 127,035,234 - 127,235,234
BIN1_sumstats = subset(df0, CHR==2 & BP > 127035234 & BP < 127235234)
# INPP5D rs10933431 hit at chromosome 2, position 233,117,202 -> BP range: 233,017,202 - 233,217,202
INPP5D_sumstats = subset(df0, CHR==2 & BP > 233017202 & BP < 233217202)
# MAPT rs199515 hit at chromosome 17, position 46,779,275 -> BP range: 46,679,275 - 46,879,275
MAPT_sumstats = subset(df0, CHR==17 & BP > 46679275 & BP < 46879275)
# CLNK rs6846529 hit at chromosome 4, position 11,023,507 -> BP range: 10,923,507 - 11,123,507
CLNK_sumstats = subset(df0, CHR==4 & BP > 10923507 & BP < 11123507)
# HLA-DQA1 rs6605556 at chromosome 6, position 32,615,322 -> BP range: 32,515,322 - 32,715,322
HLA_sumstats = subset(df0, CHR==6 & BP > 32515322 & BP < 32715322)
# UNC5CL rs10947943 hit at chromosome 6, position 41,036,354 -> BP range: 40,936,354 - 41,136,354
UNC5CL_sumstats = subset(df0, CHR==6 & BP > 40936354 & BP < 41136354)
# TREM2 rs143442484 hit at chromosome 6, position 41,161,469 & rs75932628 at chromosome 6, position 41,161514
TREM2_sumstats = subset(df0, CHR==6 & ((BP > 41061469 & BP < 41261469) | (BP > 41061514 & BP < 41261514)))
# TREML2 rs60755019 hit at chromosome 6, position 41,181,270 -> BP range: 41,081,270 - 41,281,270
TREML2_sumstats = subset(df0, CHR==6 & BP > 41081270 & BP < 41281270)
# CD2AP rs7767350 hit at chromosome 6, position 47,517,390 -> BP range: 47,417,390 - 47,617,390
CD2AP_sumstats = subset(df0, CHR==6 & BP > 47417390 & BP < 47617390)
# EPDR1 rs6966331 hit at chromosome 7, position 37,844,191 -> BP range: 37,744,191 - 37,944,191
EPDR1_sumstats = subset(df0, CHR==7 & BP > 37920640 & BP < 37951936)
# SPDYE3 rs7384878 hit at chromosome 7, position 100,334,426 -> BP range: 100,234,426 - 100,434,426
SPDYE3_sumstats = subset(df0, CHR==7 & BP > 100307702 & BP < 100322196)
# EPHA1 rs11771145 hit at chromosome 7, position 143,413,669 -> BP range: 143,313,669 - 143,513,669
EPHA1_sumstats = subset(df0, CHR==7 & BP > 143313669 & BP < 143513669)
# PTK2B rs73223431 hit at chromosome 8, position 27,362,470 -> BP range: 27,262,470 - 27,462,470
PTK2B_sumstats = subset(df0, CHR==8 & BP > 27262470 & BP < 27462470)
# CLU rs11787077 hit at chromosome 8, position 27,607,795 -> BP range: 27,507,795 - 27,707,795
CLU_sumstats = subset(df0, CHR==8 & BP > 27507795 & BP < 27707795)
# USP6NL rs7912495 hit at chromosome 10, position 11,676,714 -> BP range: 11,576,714 - 11,776,714
USP6NL_sumstats = subset(df0, CHR==10 & BP > 11576714 & BP < 11776714)
# SPI1 rs10437655 hit at chromosome 11, position 47,370,397 -> BP range: 47,270,397 - 47,470,397
SPI1_sumstats = subset(df0, CHR==11 & BP > 47270397 & BP < 47470397)
# MS4A4A rs1582763 hit at chromosome 11, position 60,254,475 -> BP range: 60,154,475 - 60,354,475
MS4A4A_sumstats = subset(df0, CHR==11 & BP > 60154475 & BP < 60354475)
# PICALM rs3851179 hit at chromosome 11, position 86,157,598 -> BP range: 86,057,598 - 86,257,598
PICALM_sumstats = subset(df0, CHR==11 & BP > 86057598 & BP < 86257598)
# SORL1 rs74685827 hit at chromosome 11, position 121,482,368 & rs11218343 at chromosome 11, position 121,564,878
SORL1_sumstats = subset(df0, CHR==11 & ((BP > 121382368 & BP < 121582368) | (BP > 121464878 & BP < 121664878)))
# FERMT2 rs17125924 hit at chromosome 14, position 52,924,962 -> BP range: 52,824,962 - 53,024,962
FERMT2_sumstats = subset(df0, CHR==14 & BP > 52824962  & BP < 53024962)
# SLC24A4 rs7401792 hit at chromosome 14, position 92,464,917 & rs12590654 at chromosome 14, position 92,472,511
SLC24A4_sumstats = subset(df0, CHR==14 & ((BP > 92364917 & BP < 92564917) | (BP > 92372511 & BP < 92572511)))
# SPPL2A rs8025980 hit at chromosome 15, position 50,701,814 -> BP range: 50,601,814 - 50,801,814
SPPL2A_sumstats = subset(df0, CHR==15 & BP > 50601814 & BP < 50801814)
# ADAM10 rs602602 hit at chromosome 15, position 58,764,824 -> BP range: 58,664,824 - 58,864,824
ADAM10_sumstats = subset(df0, CHR==15 & BP > 58664824 & BP < 58864824)
# APH1B rs117618017 hit at chromosome 15, position 63,277,703 -> BP range: 63,177,703 - 63,377,703
APH1B_sumstats = subset(df0, CHR==15 & BP > 63177703 & BP < 63377703)
# BCKDK rs889555 hit at chromosome 16, position 31,111,250 -> BP range: 31,011,250 - 31,211,250
BCKDK_sumstats = subset(df0, CHR==16 & BP > 31011250 & BP < 31211250)
# IL34 rs4985556 hit at chromosome 16, position 70,660,097 -> BP range: 70,560,097 - 70,760,097
IL34_sumstats = subset(df0, CHR==16 & BP > 70560097 & BP < 70760097)
# PLCG2 rs12446759 hit at chromosome 16, position 81,739,398 & rs72824905 at chromosome 16, position 81,908,423
PLCG2_sumstats = subset(df0, CHR==16 & ((BP > 81639398 & BP < 81839398) | (BP > 81808423 & BP < 82008423)))
# SCIMP rs7225151 hit at chromosome 17, position 5,233,752 -> BP range: 5,133,752 - 5,333,752
SCIMP_sumstats = subset(df0, CHR==17 & BP > 5133752 & BP < 5333752)
# ABI3 rs616338 hit at chromosome 17, position 49,219,935 -> BP range: 49,119,935 - 49,319,935
ABI3_sumstats = subset(df0, CHR==17 & BP > 49119935 & BP < 49319935)
# TSPOAP1 rs2526377 hit at chromosome 17, position 58,332,680 -> BP range: 58,232,680 - 58,432,680
TSPOAP1_sumstats = subset(df0, CHR==17 & BP > 58232680 & BP < 58432680)
# ACE rs4277405 hit at chromosome 17, position 63,471,557 -> BP range: 63,371,557 - 63,571,557
ACE_sumstats = subset(df0, CHR==17 & BP > 63371557 & BP < 63571557)
# ABCA7 rs12151021 hit at chromosome 19, position 1,050,875 -> BP range: 950,875 - 1,150,875
ABCA7_sumstats = subset(df0, CHR==19 & BP > 950875 & BP < 1150875)
# CASS4 rs6014724 hit at chromosome 20, position 56,423,488 -> BP range: 56,323,488 - 56,523,488
CASS4_sumstats = subset(df0, CHR==20 & BP > 56323488 & BP < 56523488)
# ADAMTS1 rs2830489 hit at chromosome 21, position 26,775,872 -> BP range: 26,675,872 - 26,875,872
ADAMTS1_sumstats = subset(df0, CHR==21 & BP > 26675872 & BP < 26875872)
# SORT1 rs141749679 hit at chromosome 1, position 109,345,810 -> BP range: 109,245,810 - 109,445,810
SORT1_sumstats = subset(df0, CHR==1 & BP > 109245810 & BP < 109445810)
# ADAM17 rs72777026 hit at chromosome 2, position 9,558,882 -> BP range: 9,458,882 - 9,658,882
ADAM17_sumstats = subset(df0, CHR==2 & BP > 9458882 & BP < 9658882)
# PRKD3 rs17020490 hit at chromosome 2, position 37,304,796 -> BP range: 37,204,796 - 37,404,796
PRKD3_sumstats = subset(df0, CHR==2 & BP > 37204796 & BP < 37404796)
# NCK2 rs143080277 hit at chromosome 2, position 105,749,599 -> BP range: 105,649,599 - 105,849,599
NCK2_sumstats = subset(df0, CHR==2 & BP > 105649599 & BP < 105849599)
# WDR12 rs139643391 hit at chromosome 2, position 202,878,716 -> BP range: 202,778,716 - 202,978,716
WDR12_sumstats = subset(df0, CHR==2 & BP > 202778716 & BP < 202978716)
# MME rs16824536 hit at chromosome 3, position 155,069,722 & rs61762319 at chromosome 3, position 155,084,189
MME_sumstats = subset(df0, CHR==3 & ((BP > 154969722 & BP < 155169722) | (BP > 154984189 & BP < 155184189)))
# IDUA rs3822030 hit at chromosome 4, position 993,555 -> BP range: 893,555 - 1,093,555
IDUA_sumstats = subset(df0, CHR==4 & BP > 893555 & BP < 1093555)
# RHOH rs2245466 hit at chromosome 4, position 40,197,226 -> BP range: 40,097,226 - 40,297,226
RHOH_sumstats = subset(df0, CHR==4 & BP > 40097226 & BP < 40297226) 
# ANKH rs112403360 hit at chromosome 5, position 14,724,304 -> BP range: 14,624,304 - 14,824,304
ANKH_sumstats = subset(df0, CHR==5 & BP > 14624304 & BP < 14824304)
# COX7C rs62374257 hit at chromosome 5, position 86,927,378 -> BP range: 86,827,378 - 87,027,378
COX7C_sumstats = subset(df0, CHR==5 & BP > 86827378 & BP < 87027378)
# TNIP1 rs871269 hit at chromosome 5, position 151,052,827 -> BP range: 150,952,827 - 151,152,827
TNIP1_sumstats = subset(df0, CHR==5 & BP > 150952827 & BP < 151152827)
# RASGEF1C rs113706587 hit at chromosome 5, position 180,201,150 -> BP range: 180,101,150 - 180,301,150
RASGEF1C_sumstats = subset(df0, CHR==5 & BP > 180101150 & BP < 180301150)
# HS3ST5 rs785129 hit at chromosome 6, position 114,291,731 -> BP range: 114,191,731 - 114,391,731
HS3ST5_sumstats = subset(df0, CHR==6 & BP > 114191731 & BP < 114391731)
# UMAD1 rs6943429 hit at chromosome 7, position 7,817,263 -> BP range: 7,717,263 - 7,917,263
UMAD1_sumstats = subset(df0, CHR==7 & BP > 7717263 & BP < 7917263)
# ICA1 rs10952097 hit at chromosome 7, position 8,204,382 -> BP range: 8,104,382 - 8,304,382
ICA1_sumstats = subset(df0, CHR==7 & BP > 8104382 & BP < 8304382)
# TMEM106B rs13237518 hit at chromosome 7, position 12,229,967 -> BP range: 12,129,967 - 12,329,967
TMEM106B_sumstats = subset(df0, CHR==7 & BP > 12129967 & BP < 12329967)
# JAZF1 rs1160871 hit at chromosome 7, position 28,129,126 -> BP range: 28,029,126 - 28,229,126
JAZF1_sumstats = subset(df0, CHR==7 & BP > 28029126 & BP < 28229126)
# SEC61G rs76928645 hit at chromosome 7, position 54,873,635 -> BP range: 54,773,635 - 54,973,635
SEC61G_sumstats = subset(df0, CHR==7 & BP > 54773635 & BP < 54973635)
# CTSB rs1065712 hit at chromosome 8, position 11,844,613 -> BP range: 11,744,613 - 11,944,613
CTSB_sumstats = subset(df0, CHR==8 & BP > 11744613 & BP < 11944613)
# SHARPIN rs34173062 hit at chromosome 8, position 144,103,704 -> BP range: 144,003,704 - 144,203,704
SHARPIN_sumstats = subset(df0, CHR==8 & BP > 144003704 & BP < 144203704)
# ABCA1 rs1800978 hit at chromosome 9, position 104,903,697 -> BP range: 104,803,697 - 105,003,697
ABCA1_sumstats = subset(df0, CHR==9 & BP > 104803697 & BP < 105003697)
# ANK3 rs7068231 hit at chromosome 10, position 60,025,170 -> BP range: 59,925,170 - 60,125,170
ANK3_sumstats = subset(df0, CHR==10 & BP > 59925170 & BP < 60125170)
# TSPAN14 rs6586028 hit at chromosome 10, position 80,494,228 -> BP range: 80,394,228 - 80,594,228
TSPAN14_sumstats = subset(df0, CHR==10 & BP > 80394228 & BP < 80594228)
# BLNK rs6584063 hit at chromosome 10, position 96,266,650 -> BP range: 96,166,650 - 96,366,650
BLNK_sumstats = subset(df0, CHR==10 & BP > 96166650 & BP < 96366650)
# PLEKHA1 rs7908662 hit at chromosome 10, position 122,413,396 -> BP range: 122,313,396 - 122,513,396
PLEKHA1_sumstats = subset(df0, CHR==10 & BP > 122313396 & BP < 122513396)
# TPCN1 rs6489896 hit at chromosome 12, position 113,281,983 -> BP range: 113,181,983 - 113,381,983
TPCN1_sumstats = subset(df0, CHR==12 & BP > 113181983 & BP < 113381983)
# IGH gene cluster rs7157106 hit at chromosome 14, position 105,761,758 & rs10131280 at chromosome 14, position 106,665,591
IGH_sumstats = subset(df0, CHR==12 & ((BP > 105661758 & BP < 105861758) | (BP > 106565591 & BP < 106765591)))
# SNX1 rs3848143 hit at chromosome 15, position 64,131,307 -> BP range: 64,031,307 - 64,231,307
SNX1_sumstats = subset(df0, CHR==15 & BP > 64031307 & BP < 64231307)
# CTSH rs12592898 hit at chromosome 15, position 78,936,857 -> BP range: 78,836,857 - 79,036,857
CTSH_sumstats = subset(df0, CHR==15 & BP > 78836857 & BP < 79036857)
# DOC2A rs1140239 hit at chromosome 16, position 30,010,081 -> BP range: 29,910,081 - 30,110,081
DOC2A_sumstats = subset(df0, CHR==16 & BP > 29910081 & BP < 30110081)
# MAF rs450674 hit at chromosome 16, position 79,574,511 -> BP range: 79,474,511 - 79,674,511
MAF_sumstats = subset(df0, CHR==16 & BP > 79474511 & BP < 79674511)
# FOXF1 rs16941239 hit at chromosome 16, position 86,420,604 -> BP range: 86,320,604 - 86,520,604
FOXF1_sumstats = subset(df0, CHR==16 & BP > 86320604 & BP < 86520604)
# WDR81 rs35048651 hit at chromosome 17, position 1,728,046 -> BP range: 1,628,046 - 1,828,046
WDR81_sumstats = subset(df0, CHR==17 & BP > 1628046 & BP < 1828046)
# MYO15A rs2242595 hit at chromosome 17, position 18,156,140 -> BP range: 18,056,140 - 18,256,140
MYO15A_sumstats = subset(df0, CHR==17 & BP > 18056140 & BP < 18256140)
# GRN rs5848 hit at chromosome 17, position 44,352,876 -> BP range: 44,252,876 - 44,452,876
GRN_sumstats = subset(df0, CHR==17 & BP > 44252876 & BP < 44452876)
# KLF16 rs149080927 hit at chromosome 19, position 1,854,254 -> BP range: 1,754,254 - 1,954,254
KLF16_sumstats = subset(df0, CHR==19 & BP > 1754254 & BP < 1954254)
# SIGLEC11 rs9304690 hit at chromosome 19, position 49,950,060 -> BP range: 49,850,060 - 50,050,060
SIGLEC11_sumstats = subset(df0, CHR==19 & BP > 49850060 & BP < 50050060)
# LILRB2 rs587709 hit at chromosome 19, position 54,267,597 -> BP range: 54,167,597 - 54,367,597
LILRB2_sumstats = subset(df0, CHR==19 & BP > 54167597 & BP < 54367597)
# RBCK1 rs1358782 hit at chromosome 20, position 413,334 -> BP range: 313,334 - 513,334
RBCK1_sumstats = subset(df0, CHR==20 & BP > 313334 & BP < 513334)
# SLC2A4RG rs6742 hit at chromosome 20, position 63,743,088 -> BP range: 63,643,088 - 63,843,088
SLC2A4RG_sumstats = subset(df0, CHR==20 & BP > 63643088 & BP < 63843088)
# APP rs2154481 hit at chromosome 21, position 26,101,558 -> BP range: 26,001,558 - 26,201,558
APP_sumstats = subset(df0, CHR==21 & BP > 26001558 & BP < 26201558)

In [9]:
## Run for genes
genes <- c("APOE","CR1","BIN1","INPP5D","MAPT","CLNK","HLA","UNC5CL","TREM2","TREML2","CD2AP","EPDR1","SPDYE3","EPHA1","PTK2B","CLU","USP6NL","SPI1","MS4A4A","PICALM","SORL1","FERMT2","SLC24A4","SPPL2A","ADAM10","APH1B","BCKDK","IL34","PLCG2","SCIMP","ABI3","TSPOAP1","ACE","ABCA7","CASS4","ADAMTS1","SORT1","ADAM17","PRKD3","NCK2","WDR12","MME","IDUA","RHOH","ANKH","COX7C","TNIP1","RASGEF1C","HS3ST5","UMAD1","ICA1","TMEM106B","JAZF1","SEC61G","CTSB","SHARPIN","ABCA1","ANK3","TSPAN14","BLNK","PLEKHA1","TPCN1","IGH","SNX1","CTSH","DOC2A","MAF","FOXF1","WDR81","MYO15A","GRN","KLF16","SIGLEC11","LILRB2","RBCK1","SLC2A4RG","APP")

In [10]:
for (gene in genes) {
    # Assume gene_sumstats is a data frame with summary statistics for each gene
    gene_sumstats <- get(paste0(gene, "_sumstats"))  # Get the data frame for the current gene
    if (is.data.frame(gene_sumstats)) {
        write_tsv(gene_sumstats, paste0("{WORK_DIR}/PAR/", "/", gene, "_ad_variants_eur.tab"))
    } else {
        warning(paste("No data frame found for", gene))
    }
}

In [11]:
## Run for genes
genes <- c("APOE","CR1","BIN1","INPP5D","MAPT","CLNK","HLA","UNC5CL","TREM2","TREML2","CD2AP","EPDR1","SPDYE3","EPHA1","PTK2B","CLU","USP6NL","SPI1","MS4A4A","PICALM","SORL1","FERMT2","SLC24A4","SPPL2A","ADAM10","APH1B","BCKDK","IL34","PLCG2","SCIMP","ABI3","TSPOAP1","ACE","ABCA7","CASS4","ADAMTS1","SORT1","ADAM17","PRKD3","NCK2","WDR12","MME","IDUA","RHOH","ANKH","COX7C","TNIP1","RASGEF1C","HS3ST5","UMAD1","ICA1","TMEM106B","JAZF1","SEC61G","CTSB","SHARPIN","ABCA1","ANK3","TSPAN14","BLNK","PLEKHA1","TPCN1","IGH","SNX1","CTSH","DOC2A","MAF","FOXF1","WDR81","MYO15A","GRN","KLF16","SIGLEC11","LILRB2","RBCK1","SLC2A4RG","APP")

In [12]:
for (gene in genes) {
    input_file <- paste0("{WORK_DIR}/PAR/", gene, "_ad_variants_eur.tab")
    output_file <- paste0("{WORK_DIR}/PAR/", gene, "_Bellenguez_2022.csv")
    
    # Read in the dataset
    dataset1 <- fread(input_file, header = TRUE, sep = "\t")
    
    # Remove duplicated rows based on the 'MarkerName' column
    dataset1 <- dataset1[!duplicated(dataset1$MarkerName), ]
    
    # Add a new column 'StdErr_squared' by squaring 'StdErr'
    dataset_final <- dataset1 %>% mutate(StdErr_squared = StdErr^2)
    
    # Select the required columns and rename them
    output <- dataset_final[, c("MarkerName", "Effect", "P-value", "StdErr_squared")]
    colnames(output) <- c("SNP", "beta", "P", "varbeta")
    
    # Write the output to a CSV file
    fwrite(output, file = output_file, na = "NA", quote = FALSE, row.names = FALSE, sep = "\t")
}

In [13]:
## Run for genes
genes <- c("APOE","CR1","BIN1","INPP5D","MAPT","CLNK","HLA","UNC5CL","TREM2","TREML2","CD2AP","EPDR1","SPDYE3","EPHA1","PTK2B","CLU","USP6NL","SPI1","MS4A4A","PICALM","SORL1","FERMT2","SLC24A4","SPPL2A","ADAM10","APH1B","BCKDK","IL34","PLCG2","SCIMP","ABI3","TSPOAP1","ACE","ABCA7","CASS4","ADAMTS1","SORT1","ADAM17","PRKD3","NCK2","WDR12","MME","IDUA","RHOH","ANKH","COX7C","TNIP1","RASGEF1C","HS3ST5","UMAD1","ICA1","TMEM106B","JAZF1","SEC61G","CTSB","SHARPIN","ABCA1","ANK3","TSPAN14","BLNK","PLEKHA1","TPCN1","IGH","SNX1","CTSH","DOC2A","MAF","FOXF1","WDR81","MYO15A","GRN","KLF16","SIGLEC11","LILRB2","RBCK1","SLC2A4RG","APP")

In [14]:
for (gene in genes) {
    input_file <- paste0("{WORK_DIR}/PAR/", gene, "_Bellenguez_2022.csv")
    output <- fread(input_file, header = TRUE, sep = "\t")
    
    # Check if output has 0 rows
    if (nrow(output) == 0) {
        cat("No rows in output for gene: ", gene, ". Skipping...\n")
        next  # Skip to the next gene in the loop
    }
    
    SNP <- output$SNP
    beta <- output$beta
    varbeta <- output$varbeta
    N <- 440683  # 39,106 AD cases vs 440,683 total (39,106 cases, 401,577 controls - Bellenguez et al 2022)
    s <- 0.089
    type <- 'cc'
    
    # Create dataset for fine-mapping
    dataset <- list(
        snp = SNP, 
        beta = beta, 
        varbeta = varbeta, 
        N = N, 
        s = s, 
        type = type)
        
    # Ensure dataset variables are numeric
    dataset$snp <- unlist(dataset$snp)
    dataset$beta <- unlist(dataset$beta)
    dataset$varbeta <- unlist(dataset$varbeta)
    
    # Assuming finemap.abf() works with a list, otherwise convert to a data.frame
    results <- finemap.abf(
        dataset = dataset,
        p1 = 1e-04  # Optional parameter for p-value threshold (can adjust based on your data)
    )
        
    # Check if results has 0 rows
    if (nrow(results) == 0) {
        cat("No results returned for gene: ", gene, ". Skipping...\n")
        next  # Skip to the next gene in the loop
    }
    
    # Combine the results with the original output
    combo <- cbind(results[1:(nrow(results) - 1),], output)
    
    # Subset results where SNP.PP > 0.2
    hits <- subset(combo, SNP.PP > 0.2)
    
    # Save the results to a CSV file
    final_output_file <- paste0("{WORK_DIR}/PAR/", gene, "_results_fine_map_Bellenguez.csv")
    fwrite(combo, file = final_output_file, na = "NA", quote = F, row.names = F, sep = ",")
    }

“minimum p value is: 0.00012938
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check you supplied var(beta) and not sd(beta) for the varbeta argument. If that's not the explanation, please check the 02_data vignette.”
“minimum p value is: 0.0067688
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check you supplied var(beta) and not sd(beta) for the varbeta argument. If that's not the explanation, please check the 02_data vignette.”
“minimum p value is: 1.4749e-06
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check you supplied var(beta) and not sd(beta) for the varbeta argument. If that's not the explanation, please check the 02_data vignette.”
“minimum p value is: 0.00062158
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check you supplied var(beta) and not sd(beta) for th

In [15]:
# Define the directory containing the CSV files
input_directory <- "{WORK_DIR}/PAR/"  # Replace with your directory path

# List all files with the pattern "results_fine_map.csv" in the directory
file_list <- list.files(input_directory, pattern = "_results_fine_map_Bellenguez.csv$", full.names = TRUE)

# Initialize an empty list to store the results
results_list <- list()

# Loop through each file
for (file in file_list) {
  # Extract the gene name from the file name (remove the "_results_fine_map.csv" suffix)
  gene <- gsub("_results_fine_map_Bellenguez\\.csv$", "", basename(file))
  
  # Read the CSV file
  data <- read.csv(file)
  
  # Select the SNP with the highest SNP.PP value
  best_snp <- data %>%
    slice_max(SNP.PP, n = 1) %>%  # Select row(s) with the max SNP.PP
    mutate(gene = gene)           # Add the gene name
  
  # Append to the results list
  results_list[[gene]] <- best_snp
}

# Combine all results into a single dataframe
final_results <- bind_rows(results_list)

# Export the results to a CSV file
output_file <- "top_snp_per_gene_eur_ad.csv"  # Desired output file name
write.csv(final_results, output_file, row.names = FALSE)

# Print the first few rows of the final results
print(head(final_results))

          V.        z.        r.     lABF.                 snp prior     SNP.PP
1 0.00023409 -5.228758 0.9941818 11.017039 chr9:104903754:G:GC 1e-04 0.37584804
2 0.00011664 11.351852 0.9970925 61.324705   chr19:1050875:A:G 1e-04 1.00000000
3 0.00267289  5.365571 0.9373633 12.107837  chr17:49219935:T:C 1e-04 0.93810071
4 0.00010201 -6.138614 0.9974562 15.806307  chr17:63483402:T:C 1e-04 0.16476466
5 0.00013689 -6.427350 0.9965894 17.744533 chr15:58790585:TA:T 1e-04 0.55954096
6 0.00015876 -4.373016 0.9960467  6.757233    chr2:9474089:G:T 1e-04 0.03536693
                  SNP    beta         P    varbeta   gene
1 chr9:104903754:G:GC -0.0800 1.759e-07 0.00023409  ABCA1
2   chr19:1050875:A:G  0.1226 7.692e-30 0.00011664  ABCA7
3  chr17:49219935:T:C  0.2774 8.180e-08 0.00267289   ABI3
4  chr17:63483402:T:C -0.0620 8.077e-10 0.00010201    ACE
5 chr15:58790585:TA:T -0.0752 1.306e-10 0.00013689 ADAM10
6    chr2:9474089:G:T -0.0551 1.193e-05 0.00015876 ADAM17
