# Fine-mapping of PD-related risk loci in European summary statistics
* Project: Cross-ancestry PAR
* Version: R/4.4
* Status: Complete
* Last Updated: 13-FEB-2025

## Notebook overview
* Extract chromosome and base pair positions from summary statistics for selected loci
* Perform fine-mapping and save results

In [21]:
library("data.table")
#if (!requireNamespace("BiocManager", quietly = TRUE))
#    install.packages("BiocManager")
#BiocManager::install("snpStats")
library("robustbase")
library(ggplot2)
library(tidyr)
devtools::install_github("chr1swallace/coloc")
library("coloc")
library("tidyverse")
library("readr")

Skipping install of 'coloc' from a github remote, the SHA1 (fd1c0351) has not changed since last install.
  Use `force = TRUE` to force installation



In [22]:
## Read dataframe
df0 <- fread("{WORK_DIR}/PD/summary_stats/META5_all_with_rsid_hg38.txt", header =T)
df0$CHR <- 	df0$Chr

In [23]:
head(df0)

MarkerName,Allele1,Allele2,Freq1,FreqSE,MinFreq,MaxFreq,Effect,StdErr,P-value,Direction,HetISq,HetChiSq,HetDf,HetPVal,freqSpan,ID,Chr,BP,CHR
<chr>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<chr>,<dbl>,<dbl>,<int>,<dbl>,<dbl>,<chr>,<int>,<int>,<int>
chr10:98240868,a,g,0.5665,0.0067,0.5525,0.5943,0.011,0.0095,0.2476,+++++++-++----+-+,0.0,15.14,16,0.5144,0.0418,rs7899632,10,98240868,10
chr10:98240888,a,c,0.7953,0.008,0.7208,0.8111,-0.0091,0.0116,0.4295,-+++-------+-+-+-,0.0,12.625,16,0.6999,0.0903,rs61875309,10,98240888,10
chr10:98242110,t,c,0.014,0.0017,0.0044,0.0178,-0.0152,0.0649,0.8147,+??+--?++??+-++--,0.0,8.126,11,0.7019,0.0134,rs150203744,10,98242110,10
chr10:98242642,a,g,0.0018,0.0005,0.0014,0.0043,-0.1331,0.1778,0.4541,-?+--?+??+??-?+??,0.0,4.328,7,0.7413,0.0029,rs8181398,10,98242642,10
chr10:98242707,t,c,0.9868,0.002,0.9792,0.9912,0.0347,0.0742,0.6396,+???--?++??+---++,0.0,8.593,10,0.5711,0.012,rs111551711,10,98242707,10
chr10:98243485,t,g,0.8819,0.0069,0.8582,0.8985,-0.0011,0.0149,0.9423,+---+++-++++-++-+,10.1,17.807,16,0.3353,0.0403,rs12258651,10,98243485,10


In [5]:
## EXTRACT CHRS
# Chromosome 18: 33,578,219-33,751,195
ASXL3_sumstats = subset(df0, CHR==18 & BP > 33578219 & BP< 33751195)
# Chromosome 10: 119,651,380-119,677,819
BAG3_sumstats = subset(df0, CHR==10 & BP > 119651380 & BP< 119677819)
# Chromosome 8: 22,620,418-22,669,148
BIN3_sumstats = subset(df0, CHR==8 & BP > 22620418 & BP< 22669148)
# Chromosome 17: 61,679,139-61,863,559
BRIP1_sumstats = subset(df0, CHR==17 & BP > 61679139 & BP< 61863559)
# Chromosome 4: 15,703,065-15,738,313
BST1_sumstats = subset(df0, CHR==4 & BP > 15703065 & BP< 15738313)
# Chromosome 5: 134,845,680-134,859,735
C5orf24_sumstats = subset(df0, CHR==5 & BP > 134845680 & BP< 134859735)
# Chromosome 13: 49,308,650-49,444,064
CAB39L_sumstats = subset(df0, CHR==13 & BP > 49308650 & BP< 49444064)
# Chromosome 4: 113,418,054-113,761,927
CAMK2D_sumstats = subset(df0, CHR==4 & BP > 113418054 & BP< 113761927)
# Chromosome 16: 52,552,084-52,652,132
CASC16_sumstats = subset(df0, CHR==16 & BP > 52552084 & BP< 52652132)
# Chromosome 16: 28,931,965-28,939,342
CD19_sumstats = subset(df0, CHR==16 & BP > 28931965 & BP< 28939342)
# Chromosome 16: 53,054,991-53,329,150
CHD9_sumstats = subset(df0, CHR==16 & BP > 53054991 & BP< 53329150)
# Chromosome 17: 7,445,061-7,457,710
CHRNB1_sumstats = subset(df0, CHR==17 & BP > 7445061 & BP< 7457710)
# Chromosome 4: 169,612,633-169,723,673
CLCN3_sumstats = subset(df0, CHR==4 & BP > 169612633 & BP< 169723673)
# Chromosome 17: 45,784,277-45,835,828
CRHR1_sumstats = subset(df0, CHR==17 & BP > 45784277 & BP< 45835828)
# Chromosome 20: 6,006,093-6,040,053
CRLS1_sumstats = subset(df0, CHR==20 & BP > 6006093 & BP< 6040053)
# Chromosome 8: 11,842,524-11,869,533
CTSB_sumstats = subset(df0, CHR==8 & BP > 11842524 & BP< 11869533)
# Chromosome 11: 83,455,012-85,628,335
DLG2_sumstats = subset(df0, CHR==11 & BP > 83455012 & BP< 85628335)
# Chromosome 17: 78,423,697-78,577,396
DNAH17_sumstats = subset(df0, CHR==17 & BP > 78423697 & BP< 78577396)
# Chromosome 21: 37,365,573-37,526,358
DYRK1A_sumstats = subset(df0, CHR==21 & BP > 37365573 & BP< 37526358)
# Chromosome 5: 60,751,791-60,844,274
ELOVL7_sumstats = subset(df0, CHR==5 & BP > 60751791 & BP< 60844274)
# Chromosome 17: 44,353,215-44,363,853
FAM171A2_sumstats = subset(df0, CHR==17 & BP > 44353215 & BP< 44363853)
# Chromosome 4: 76,214,040-76,283,783
FAM47E_sumstats = subset(df0, CHR==4 & BP > 76214040 & BP< 76283783)
# Chromosome 4: 76,251,721-76,311,129
FAM47E_STBD1_sumstats = subset(df0, CHR==4 & BP > 76251721 & BP< 76311129)
# Chromosome 8: 129,839,593-130,017,504
FAM49B_sumstats = subset(df0, CHR==8 & BP > 129839593 & BP< 130017504)
# Chromosome 12: 132,489,551-132,585,188
FBRSL1_sumstats = subset(df0, CHR==12 & BP > 132489551 & BP< 132585188)
# Chromosome 1: 161,505,430-161,524,013
FCGR2A_sumstats = subset(df0, CHR==1 & BP > 161505430 & BP< 161524013)
# Chromosome 8: 16,992,181-17,002,345
FGF20_sumstats = subset(df0, CHR==8 & BP > 16992181 & BP< 17002345)
# Chromosome 6: 111,660,332-111,873,452
FYN_sumstats = subset(df0, CHR==6 & BP > 111660332 & BP< 111873452)
# Chromosome 4: 849,276-932,373
GAK_sumstats = subset(df0, CHR==4 & BP > 849276 & BP< 932373)
# Chromosome 14: 87,837,820-87,993,665
GALC_sumstats = subset(df0, CHR==14 & BP > 87837820 & BP< 87993665)
# Chromosome 1: 155,214,368-155,218,874
GBAP1_sumstats = subset(df0, CHR==1 & BP > 155214368 & BP< 155218874)
# Chromosome 10: 102,245,371-102,382,899
GBF1_sumstats = subset(df0, CHR==10 & BP > 102245371 & BP< 102382899)
# Chromosome 14: 54,842,008-54,902,826
GCH1_sumstats = subset(df0, CHR==14 & BP > 54842008 & BP< 54902826)
# Chromosome 7: 23,235,967-23,275,108
GPNMB_sumstats = subset(df0, CHR==7 & BP > 23235967 & BP< 23275108)
# Chromosome 7: 66530305 - 66592407
GS1_124K5_11_sumstats = subset(df0, CHR==7 & BP > 66530305 & BP< 66592407)
# Chromosome 12: 122,834,453-122,862,961
HIP1R_sumstats = subset(df0, CHR==12 & BP > 122834453 & BP< 122862961)
# Chromosome 6: 32,517,353-32,530,287
HLA_DRB5_sumstats = subset(df0, CHR==6 & BP > 32517353 & BP< 32530287)
# Chromosome 11: 133,896,438-133,956,968
IGSF9B_sumstats = subset(df0, CHR==11 & BP > 133896438 & BP< 133956968)
# Chromosome 10: 119,726,042-119,829,147
INPP5F_sumstats = subset(df0, CHR==10 & BP > 119726042 & BP< 119829147)
# Chromosome 3: 48,688,003-48,740,353
IP6K2_sumstats = subset(df0, CHR==3 & BP > 48688003 & BP< 48740353)
# Chromosome 10: 15,513,954-15,719,922
ITGA8_sumstats = subset(df0, CHR==10 & BP > 15513954 & BP< 15719922)
# Chromosome 1: 226,631,690-226,739,323
ITPKB_sumstats = subset(df0, CHR==1 & BP > 226631690 & BP< 226739323)
# Chromosome 2: 95,297,327-95,386,077
KCNIP3_sumstats = subset(df0, CHR==2 & BP > 95297327 & BP< 95386077)
# Chromosome 2: 17,877,847-18,361,616
KCNS3_sumstats = subset(df0, CHR==2 & BP > 17877847 & BP< 18361616)
# Chromosome 3: 122,421,902-122,514,945 
KPNA1_sumstats = subset(df0, CHR==3 & BP > 122421902 & BP< 122514945)
# Chromosome 1: 155,169,408-155,173,475
KRTCAP2_sumstats = subset(df0, CHR==1 & BP > 155169408 & BP< 155173475)
# Chromosome 4: 17,841,187-18,021,876
LCORL_sumstats = subset(df0, CHR==4 & BP > 17841187 & BP< 18021876)
# Chromosome 3: 28575278 - 28758337
LINC00693_sumstats = subset(df0, CHR==3 & BP > 28575278 & BP< 28758337)
# Chromosome 6: 27761744 - 27763187
LOC100131289_sumstats = subset(df0, CHR==6 & BP > 27761744 & BP< 27763187)
# Chromosome 12: 40,196,744-40,369,285
LRRK2_sumstats = subset(df0, CHR==12 & BP > 40196744 & BP< 40369285)
# Chromosome 2: 101,696,850-101,894,690
MAP4K4_sumstats = subset(df0, CHR==2 & BP > 101696850 & BP< 101894690)
# Chromosome 13: 97,221,434-97,394,120
MBNL2_sumstats = subset(df0, CHR==13 & BP > 97221434 & BP< 97394120)
# Chromosome 3: 183,015,218-183,116,075
MCCC1_sumstats = subset(df0, CHR==3 & BP > 183015218 & BP< 183116075)
# Chromosome 3: 151,085,286-151,437,072
MED12L_sumstats = subset(df0, CHR==3 & BP > 151085286 & BP< 151437072)
# Chromosome 18: 51,174,550-51,218,333
MEX3C_sumstats = subset(df0, CHR==18 & BP > 51174550 & BP< 51218333)
# Chromosome 14: 37,197,894-37,579,125
MIPOL1_sumstats = subset(df0, CHR==14 & BP > 37197894 & BP< 37579125)
# Chromosome 16: 50,693,588-50,733,077
NOD2_sumstats = subset(df0, CHR==16 & BP > 50693588 & BP< 50733077)
# Chromosome 1: 205,712,822-205,750,182
NUCKS1_sumstats = subset(df0, CHR==1 & BP > 205712822 & BP< 205750182)
# Chromosome 5: 102,753,981-103,029,730
PAM_sumstats = subset(df0, CHR==5 & BP > 102753981 & BP< 103029730)
# Chromosome 1: 154,924,740-154,936,719
PMVK_sumstats = subset(df0, CHR==1 & BP > 154924740 & BP< 154936719)
# Chromosome 1: 205,767,986-205,775,482
RAB29_sumstats = subset(df0, CHR==1 & BP > 205767986 & BP< 205775482)
# Chromosome 17: 42,579,513-42,610,623
RETREG3_sumstats = subset(df0, CHR==17 & BP > 42579513 & BP< 42610623)
# Chromosome 6: 71,886,550-72,403,150
RIMS1_sumstats = subset(df0, CHR==6 & BP > 71886550 & BP< 72403150)
# Chromosome 18: 42,743,227-43,115,691
RIT2_sumstats = subset(df0, CHR==18 & BP > 42743227 & BP< 43115691)
# Chromosome 11: 10,511,673-10,541,230
RNF141_sumstats = subset(df0, CHR==11 & BP > 10511673 & BP< 10541230)
# Chromosome 6: 132,814,569-132,817,564
RPS12_sumstats = subset(df0, CHR==6 & BP > 132814569 & BP< 132817564)
# Chromosome 14: 74,903,951-74,923,302
RPS6KL1_sumstats = subset(df0, CHR==14 & BP > 74903951 & BP< 74923302)
# Chromosome 3: 18,345,377-18,445,621
SATB1_sumstats = subset(df0, CHR==3 & BP > 18345377 & BP< 18445621)
# Chromosome 12: 45,919,131-45,992,120
SCAF11_sumstats = subset(df0, CHR==12 & BP > 45919131 & BP< 45992120)
# Chromosome 4: 76,158,737-76,234,536
SCARB2_sumstats = subset(df0, CHR==4 & BP > 76158737 & BP< 76234536)
# Chromosome 16: 30,957,294-30,984,664
SETD1A_sumstats = subset(df0, CHR==16 & BP > 30957294 & BP< 30984664)
# Chromosome 9: 17,579,066-17,797,124
SH3GL2_sumstats = subset(df0, CHR==9 & BP > 17579066 & BP< 17797124)
# Chromosome 1: 232,397,965-232,630,571
SIPA1L2_sumstats = subset(df0, CHR==1 & BP > 232397965 & BP< 232630571)
# Chromosome 4: 89,700,345-89,838,315
SNCA_sumstats = subset(df0, CHR==4 & BP > 89700345 & BP< 89838315)
# Chromosome 19: 2,328,615-2,355,095
SPPL2B_sumstats = subset(df0, CHR==19 & BP > 2328615 & BP< 2355095)
# Chromosome 3: 161,344,798-161,372,880
SPTSSB_sumstats = subset(df0, CHR==3 & BP > 161344798 & BP< 161372880)
# Chromosome 2: 167,954,020-168,247,595
STK39_sumstats = subset(df0, CHR==2 & BP > 167954020 & BP< 168247595)
# Chromosome 16: 19,167,971-19,268,332
SYT17_sumstats = subset(df0, CHR==16 & BP > 19167971 & BP< 19268332)
# Chromosome 2: 134,455,759-134,719,000
TMEM163_sumstats = subset(df0, CHR==2 & BP > 134455759 & BP< 134719000)
# Chromosome 4: 932,387-958,656
TMEM175_sumstats = subset(df0, CHR==4 & BP > 932387 & BP< 958656)
# Chromosome 6: 30,136,124-30,148,735
TRIM40_sumstats = subset(df0, CHR==6 & BP > 30136124 & BP< 30148735)
# Chromosome 9: 33,921,693-34,049,388
UBAP2_sumstats = subset(df0, CHR==9 & BP > 33921693 & BP< 34049388)
# Chromosome 17: 44,205,033-44,221,626
UBTF_sumstats = subset(df0, CHR==17 & BP > 44205033 & BP< 44221626)
# Chromosome 1: 171,700,160-171,742,074
VAMP4_sumstats = subset(df0, CHR==1 & BP > 171700160 & BP< 171742074)
# Chromosome 15: 61,852,389-62,060,473
VPS13C_sumstats = subset(df0, CHR==15 & BP > 61852389 & BP< 62060473)
# Chromosome 17: 46,762,506-46,833,154
WNT3_sumstats = subset(df0, CHR==17 & BP > 46762506 & BP< 46833154)

In [6]:
## Run for genes
genes <- c("ASXL3","BAG3","BIN3","BRIP1","BST1","C5orf24","CAB39L","CAMK2D","CASC16","CD19","CHD9","CHRNB1","CLCN3","CRHR1","CRLS1","CTSB","DLG2","DNAH17","DYRK1A","ELOVL7","FAM171A2","FAM47E","FAM47E_STBD1","FAM49B","FBRSL1","FCGR2A","FGF20","FYN","GAK","GALC","GBAP1","GBF1","GCH1","GPNMB","GS1_124K5_11","HIP1R","HLA_DRB5","IGSF9B","INPP5F","IP6K2","ITGA8","ITPKB","KCNIP3","KCNS3","KPNA1","KRTCAP2","LCORL","LINC00693","LOC100131289","LRRK2","MAP4K4","MBNL2","MCCC1","MED12L","MEX3C","MIPOL1","NOD2","NUCKS1","PAM","PMVK","RAB29","RETREG3","RIMS1","RIT2","RNF141","RPS12","RPS6KL1","SATB1","SCAF11","SCARB2","SETD1A","SH3GL2","SIPA1L2","SNCA","SPPL2B","SPTSSB","STK39","SYT17","TMEM163","TMEM175","TRIM40","UBAP2","UBTF","VAMP4","VPS13C","WNT3")

In [8]:
for (gene in genes) {
    # Assume gene_sumstats is a data frame with summary statistics for each gene
    gene_sumstats <- get(paste0(gene, "_sumstats"))  # Get the data frame for the current gene
    if (is.data.frame(gene_sumstats)) {
        write_tsv(gene_sumstats, paste0("{WORK_DIR}/PAR/", "/", gene, "_variants.tab"))
    } else {
        warning(paste("No data frame found for", gene))
    }
}

In [9]:
## Run for genes
genes <- c("ASXL3","BAG3","BIN3","BRIP1","BST1","C5orf24","CAB39L","CAMK2D","CASC16","CD19","CHD9","CHRNB1","CLCN3","CRHR1","CRLS1","CTSB","DLG2","DNAH17","DYRK1A","ELOVL7","FAM171A2","FAM47E","FAM47E_STBD1","FAM49B","FBRSL1","FCGR2A","FGF20","FYN","GAK","GALC","GBAP1","GBF1","GCH1","GPNMB","GS1_124K5_11","HIP1R","HLA_DRB5","IGSF9B","INPP5F","IP6K2","ITGA8","ITPKB","KCNIP3","KCNS3","KPNA1","KRTCAP2","LCORL","LINC00693","LOC100131289","LRRK2","MAP4K4","MBNL2","MCCC1","MED12L","MEX3C","MIPOL1","NOD2","NUCKS1","PAM","PMVK","RAB29","RETREG3","RIMS1","RIT2","RNF141","RPS12","RPS6KL1","SATB1","SCAF11","SCARB2","SETD1A","SH3GL2","SIPA1L2","SNCA","SPPL2B","SPTSSB","STK39","SYT17","TMEM163","TMEM175","TRIM40","UBAP2","UBTF","VAMP4","VPS13C","WNT3")

In [10]:
for (gene in genes) {
    input_file <- paste0("{WORK_DIR}/PAR/", gene, "_variants.tab")
    output_file <- paste0("{WORK_DIR}/PAR/", gene, "_Nalls_2019.csv")
    
    # Read in the dataset
    dataset1 <- fread(input_file, header = TRUE, sep = "\t")
    
    # Remove duplicated rows based on the 'MarkerName' column
    dataset1 <- dataset1[!duplicated(dataset1$MarkerName), ]
    
    # Add a new column 'StdErr_squared' by squaring 'StdErr'
    dataset_final <- dataset1 %>% mutate(StdErr_squared = StdErr^2)
    
    # Select the required columns and rename them
    output <- dataset_final[, c("ID", "Effect", "P-value", "StdErr_squared")]
    colnames(output) <- c("SNP", "beta", "P", "varbeta")
    
    # Write the output to a CSV file
    fwrite(output, file = output_file, na = "NA", quote = FALSE, row.names = FALSE, sep = "\t")
}

In [11]:
## Run for genes
genes <- c("ASXL3","BAG3","BIN3","BRIP1","BST1","C5orf24","CAB39L","CAMK2D","CASC16","CD19","CHD9","CHRNB1","CLCN3","CRHR1","CRLS1","CTSB","DLG2","DNAH17","DYRK1A","ELOVL7","FAM171A2","FAM47E","FAM47E_STBD1","FAM49B","FBRSL1","FCGR2A","FGF20","FYN","GAK","GALC","GBAP1","GBF1","GCH1","GPNMB","GS1_124K5_11","HIP1R","HLA_DRB5","IGSF9B","INPP5F","IP6K2","ITGA8","ITPKB","KCNIP3","KCNS3","KPNA1","KRTCAP2","LCORL","LINC00693","LOC100131289","LRRK2","MAP4K4","MBNL2","MCCC1","MED12L","MEX3C","MIPOL1","NOD2","NUCKS1","PAM","PMVK","RAB29","RETREG3","RIMS1","RIT2","RNF141","RPS12","RPS6KL1","SATB1","SCAF11","SCARB2","SETD1A","SH3GL2","SIPA1L2","SNCA","SPPL2B","SPTSSB","STK39","SYT17","TMEM163","TMEM175","TRIM40","UBAP2","UBTF","VAMP4","VPS13C","WNT3")

In [14]:
for (gene in genes) {
    input_file <- paste0("{WORK_DIR}/PAR/", gene, "_Nalls_2019.csv")
    output <- fread(input_file, header = TRUE, sep = "\t")
    
    # Check if output has 0 rows
    if (nrow(output) == 0) {
        cat("No rows in output for gene: ", gene, ". Skipping...\n")
        next  # Skip to the next gene in the loop
    }
    
    SNP <- output$SNP
    beta <- output$beta
    varbeta <- output$varbeta
    N <- 1474097  # 37688 PD cases vs 1,474,097 total (37,688 cases, 18,618 UKB proxy-cases and 1,417,791 controls - Nalls et al 2019)
    s <- 0.038
    type <- 'cc'
    
    # Create dataset for fine-mapping
    dataset <- list(
        snp = SNP, 
        beta = beta, 
        varbeta = varbeta, 
        N = N, 
        s = s, 
        type = type)
        
    # Ensure dataset variables are numeric
    dataset$snp <- unlist(dataset$snp)
    dataset$beta <- unlist(dataset$beta)
    dataset$varbeta <- unlist(dataset$varbeta)
    
    # Assuming finemap.abf() works with a list, otherwise convert to a data.frame
    results <- finemap.abf(
        dataset = dataset,
        p1 = 1e-04  # Optional parameter for p-value threshold (can adjust based on your data)
    )
        
    # Check if results has 0 rows
    if (nrow(results) == 0) {
        cat("No results returned for gene: ", gene, ". Skipping...\n")
        next  # Skip to the next gene in the loop
    }
    
    # Combine the results with the original output
    combo <- cbind(results[1:(nrow(results) - 1),], output)
    
    # Subset results where SNP.PP > 0.2
    hits <- subset(combo, SNP.PP > 0.2)
    
    # Save the results to a CSV file
    final_output_file <- paste0("{WORK_DIR}/PAR/", gene, "_results_fine_map_Nalls.csv")
    fwrite(combo, file = final_output_file, na = "NA", quote = F, row.names = F, sep = ",")
    }

“minimum p value is: 0.00058867
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check you supplied var(beta) and not sd(beta) for the varbeta argument. If that's not the explanation, please check the 02_data vignette.”
“minimum p value is: 0.015375
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check you supplied var(beta) and not sd(beta) for the varbeta argument. If that's not the explanation, please check the 02_data vignette.”
“minimum p value is: 0.019274
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check you supplied var(beta) and not sd(beta) for the varbeta argument. If that's not the explanation, please check the 02_data vignette.”
“minimum p value is: 0.08461
If this is what you expected, this is not a problem.
If this is not as small as you expected, please check you supplied var(beta) and not sd(beta) for the varb

In [20]:
# Define the directory containing the CSV files
input_directory <- "{WORK_DIR}/PAR/"  # Replace with your directory path

# List all files with the pattern "results_fine_map.csv" in the directory
file_list <- list.files(input_directory, pattern = "_results_fine_map_Nalls.csv$", full.names = TRUE)

# Initialize an empty list to store the results
results_list <- list()

# Loop through each file
for (file in file_list) {
  # Extract the gene name from the file name (remove the "_results_fine_map.csv" suffix)
  gene <- gsub("_results_fine_map_Nalls\\.csv$", "", basename(file))
  
  # Read the CSV file
  data <- read.csv(file)
  
  # Select the SNP with the highest SNP.PP value
  best_snp <- data %>%
    slice_max(SNP.PP, n = 1) %>%  # Select row(s) with the max SNP.PP
    mutate(gene = gene)           # Add the gene name
  
  # Append to the results list
  results_list[[gene]] <- best_snp
}

# Combine all results into a single dataframe
final_results <- bind_rows(results_list)

# Export the results to a CSV file
output_file <- "top_snp_per_gene_eur.csv"  # Desired output file name
write.csv(final_results, output_file, row.names = FALSE)

# Print the first few rows of the final results
print(head(final_results))

          V.        z.        r.     lABF.         snp prior      SNP.PP
1 0.00008836  5.648936 0.9977959 12.861362   rs1941685 1e-04 0.240894448
2 0.00192721  9.457859 0.9540344 41.129778 rs144814361 1e-04 0.999999944
3 0.00009604  5.673469 0.9976048 13.038444   rs2280104 1e-04 0.275087369
4 0.00017956  6.119403 0.9955311 15.934570  rs61169879 1e-04 0.274200364
5 0.00008836 11.010638 0.9977959 57.424759   rs4698412 1e-04 0.387052888
6 0.00121104  3.436782 0.9706137  3.968573 rs116661206 1e-04 0.005269649
          SNP   beta         P    varbeta    gene
1   rs1941685 0.0531 1.689e-08 0.00008836   ASXL3
2 rs144814361 0.4152 3.267e-21 0.00192721    BAG3
3   rs2280104 0.0556 1.164e-08 0.00009604    BIN3
4  rs61169879 0.0820 9.276e-10 0.00017956   BRIP1
5   rs4698412 0.1035 2.058e-28 0.00008836    BST1
6 rs116661206 0.1196 5.872e-04 0.00121104 C5orf24
