# Match up SNP and methylation files to be analyzed together

As part of this, we will determine which methylation sites have coverage in the SNP data and should be analyzed.

## Match files

In [1]:
library(stringr)

In [2]:
SNP_files <- list.files("/expanse/lustre/projects/jhu152/naglemi/mwas/gwas/",
                        pattern = "pgen", full.names = TRUE)
SNP_files <- SNP_files[ordered(SNP_files)]

SNP_files_libd <- data.frame(SNPs_libd = SNP_files[grepl("libd", SNP_files)])
SNP_files_ref_EUR <- data.frame(SNPs_ref_EUR = SNP_files[grepl("ref_EUR", SNP_files)])

SNP_files_libd$Chr <- as.numeric(stringr::str_extract(SNP_files_libd$SNPs_libd, "(?<=chr)\\d+"))
SNP_files_ref_EUR$Chr <- as.numeric(stringr::str_extract(SNP_files_ref_EUR$SNPs_ref_EUR, "(?<=chr)\\d+"))

SNP_files <- merge(SNP_files_libd, SNP_files_ref_EUR)

In [3]:
meth_files <- list.files("/expanse/lustre/projects/jhu152/naglemi/mwas/pheno", recursive = TRUE, pattern = "rda", full.names = TRUE)

In [4]:
meth_caud <- data.frame(meth_caud = meth_files[grepl("caud", meth_files)])
meth_dlpfc <- data.frame(meth_dlpfc = meth_files[grepl("dlpfc", meth_files)])
meth_hippo <- data.frame(meth_hippo = meth_files[grepl("hippo", meth_files)])

In [5]:
meth_caud$Chr <- as.numeric(str_extract(meth_caud$meth_caud, "(?<=chr)\\d+(?=_)"))
meth_dlpfc$Chr <- as.numeric(str_extract(meth_dlpfc$meth_dlpfc, "(?<=chr)\\d+(?=_)"))
meth_hippo$Chr <- as.numeric(str_extract(meth_hippo$meth_hippo, "(?<=chr)\\d+(?=_)"))

In [6]:
reshape_and_rename_corrected <- function(data, region) {
  # Extracting the type (AA, EA, all) based on the filename pattern
  data$Type <- gsub(paste0(".*", region, "/out/chr\\d+_(AA|EA|all)\\.rda"), "\\1", data[[1]])
  
  # Creating a wide dataframe with separate columns for AA, EA, and all
  wide_data <- reshape(data, idvar = "Chr", timevar = "Type", direction = "wide")
  
  # Correcting column names to include only the region and type without redundant parts
  colnames(wide_data)[-1] <- sapply(colnames(wide_data)[-1], function(x) {
    paste(region, gsub(".*\\.(AA|EA|all)$", "\\1", x), sep = "_")
  })
  
  return(wide_data)
}

# Applying the corrected function to each dataset
meth_caud_wide <- reshape_and_rename_corrected(meth_caud, "caud")
meth_dlpfc_wide <- reshape_and_rename_corrected(meth_dlpfc, "dlpfc")
meth_hippo_wide <- reshape_and_rename_corrected(meth_hippo, "hippo")

merged_data <- Reduce(function(x, y) merge(x, y, by = "Chr", all = TRUE), list(meth_caud_wide, meth_dlpfc_wide, meth_hippo_wide))


In [7]:
merged <- merge(SNP_files, merged_data)

In [8]:
merged

Chr,SNPs_libd,SNPs_ref_EUR,caud_AA,caud_all,caud_EA,dlpfc_AA,dlpfc_all,dlpfc_EA,hippo_AA,hippo_all,hippo_EA
<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr1.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//ref_EUR_chr1.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr1_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr1_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr1_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr1_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr1_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr1_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr1_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr1_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr1_EA.rda
2,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr2.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//ref_EUR_chr2.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr2_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr2_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr2_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr2_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr2_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr2_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr2_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr2_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr2_EA.rda
3,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr3.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//ref_EUR_chr3.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr3_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr3_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr3_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr3_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr3_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr3_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr3_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr3_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr3_EA.rda
4,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr4.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//ref_EUR_chr4.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr4_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr4_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr4_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr4_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr4_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr4_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr4_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr4_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr4_EA.rda
5,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr5.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//ref_EUR_chr5.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr5_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr5_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr5_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr5_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr5_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr5_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr5_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr5_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr5_EA.rda
6,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr6.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//ref_EUR_chr6.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr6_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr6_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr6_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr6_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr6_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr6_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr6_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr6_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr6_EA.rda
7,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr7.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//ref_EUR_chr7.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr7_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr7_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr7_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr7_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr7_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr7_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr7_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr7_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr7_EA.rda
8,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr8.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//ref_EUR_chr8.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr8_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr8_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr8_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr8_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr8_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr8_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr8_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr8_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr8_EA.rda
9,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr9.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//ref_EUR_chr9.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr9_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr9_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr9_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr9_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr9_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr9_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr9_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr9_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr9_EA.rda
10,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr10.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//ref_EUR_chr10.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr10_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr10_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr10_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr10_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr10_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr10_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr10_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr10_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr10_EA.rda


In [9]:
data.table::fwrite(merged, "09-OUT_matched_SNP_meth_files.csv")

## Determine first, last position for each file

In [10]:
merged$SNPs_ref_EUR <- NULL

In [11]:
head(merged)

Unnamed: 0_level_0,Chr,SNPs_libd,caud_AA,caud_all,caud_EA,dlpfc_AA,dlpfc_all,dlpfc_EA,hippo_AA,hippo_all,hippo_EA
Unnamed: 0_level_1,<dbl>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>,<chr>
1,1,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr1.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr1_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr1_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr1_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr1_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr1_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr1_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr1_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr1_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr1_EA.rda
2,2,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr2.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr2_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr2_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr2_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr2_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr2_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr2_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr2_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr2_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr2_EA.rda
3,3,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr3.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr3_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr3_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr3_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr3_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr3_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr3_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr3_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr3_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr3_EA.rda
4,4,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr4.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr4_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr4_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr4_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr4_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr4_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr4_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr4_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr4_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr4_EA.rda
5,5,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr5.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr5_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr5_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr5_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr5_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr5_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr5_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr5_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr5_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr5_EA.rda
6,6,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr6.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr6_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr6_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr6_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr6_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr6_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/dlpfc/out/chr6_EA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr6_AA.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr6_all.rda,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/hippo/out/chr6_EA.rda


In [12]:
# Load data.table
library(data.table)

# Assuming 'merged' is your data.table with the wide format
# Set it as a data.table if it's not already
setDT(merged)

# Melt the data.table from wide to long format
# id.vars = "Chr" to keep the chromosome column as identifier
# measure.vars patterns to match SNP data and methylation data columns
long_format <- melt(
  merged,
  id.vars = "Chr",
  measure.vars = patterns("^SNPs_libd", "caud_.*|dlpfc_.*|hippo_.*"),
  value.name = c("SNP_data", "methylation_data"),
  variable.name = "type"
)

# Adjust 'type' to distinguish between SNP and methylation data if needed
long_format[, type := ifelse(grepl("SNPs_libd", type), "pgen", "rda")]

# View the head of the long format table
head(long_format)

Chr,type,SNP_data,methylation_data
<dbl>,<chr>,<chr>,<chr>
1,rda,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr1.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr1_AA.rda
2,rda,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr2.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr2_AA.rda
3,rda,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr3.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr3_AA.rda
4,rda,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr4.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr4_AA.rda
5,rda,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr5.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr5_AA.rda
6,rda,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr6.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr6_AA.rda


In [13]:
# Load the data.table library
library(data.table)

# Assuming 'merged' is already a data.table
# If it's not, convert it using setDT(merged)

# Select SNP column and reshape methylation columns into long format
# Extract column names for methylation data
methylation_columns <- grep("caud_|dlpfc_|hippo_", names(merged), value = TRUE)

# Create a long format table
long_format <- melt(merged, id.vars = "Chr", measure.vars = methylation_columns, variable.name = "Methylation_Type", value.name = "methylation_data")

# Add SNP data to the long format data.table
long_format[, SNP_data := merged[.SD, on = "Chr", SNPs_libd]]

# Remove Methylation_Type column and arrange columns as specified
long_format[, `Methylation_Type` := NULL]
setcolorder(long_format, c("Chr", "SNP_data", "methylation_data"))

# Result
head(long_format)


Chr,SNP_data,methylation_data
<dbl>,<chr>,<chr>
1,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr1.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr1_AA.rda
2,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr2.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr2_AA.rda
3,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr3.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr3_AA.rda
4,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr4.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr4_AA.rda
5,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr5.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr5_AA.rda
6,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr6.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr6_AA.rda


In [14]:
22*9

In [15]:
library(bsseq)

Loading required package: BiocGenerics


Attaching package: ‘BiocGenerics’


The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs


The following objects are masked from ‘package:base’:

    anyDuplicated, aperm, append, as.data.frame, basename, cbind,
    colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
    get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
    match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
    Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
    table, tapply, union, unique, unsplit, which.max, which.min


Loading required package: GenomicRanges

Loading required package: stats4

Loading required package: S4Vectors


Attaching package: ‘S4Vectors’


The following objects are masked from ‘package:data.table’:

    first, second


The following object is masked from ‘package:utils’:

    findMatches


The following objects are masked from ‘package:base’:

    expand.grid, I, unname

### Test on first row

In [16]:
i <- 1

In [27]:
pvar <- fread(gsub("pgen", "pvar", long_format$SNP_data[i]))[, 1:3]
load(long_format$methylation_data[i])
first_SNP_position <- min(pvar$POS)
last_SNP_position <- max(pvar$POS)
first_meth_position <- min(start(BSobj2))
last_meth_position <- max(start(BSobj2))
window_size <- 10000
# Find first methylation index with SNP coverage
first_meth_index_with_SNP_coverage <- which(start(BSobj2) >= (first_SNP_position - window_size) & 
                                             start(BSobj2) <= (last_SNP_position + window_size))[1]
# Correcting the approach to find the last methylation index with SNP coverage
last_meth_index_with_SNP_coverage <- which(start(BSobj2) >= (first_SNP_position - window_size) & 
                                            start(BSobj2) <= (last_SNP_position + window_size))
if(length(last_meth_index_with_SNP_coverage) > 0) {
  last_meth_index_with_SNP_coverage <- last_meth_index_with_SNP_coverage[length(last_meth_index_with_SNP_coverage)]
} else {
  last_meth_index_with_SNP_coverage <- NA # In case no indices are found
}

# Values for the first and last methylation sites with SNP coverage
if(!is.na(first_meth_index_with_SNP_coverage)) {
  first_meth_value_with_SNP_coverage <- start(BSobj2)[first_meth_index_with_SNP_coverage]
} else {
  first_meth_value_with_SNP_coverage <- NA # In case no first index is found
}

if(!is.na(last_meth_index_with_SNP_coverage)) {
  last_meth_value_with_SNP_coverage <- start(BSobj2)[last_meth_index_with_SNP_coverage]
} else {
  last_meth_value_with_SNP_coverage <- NA # In case no last index is found
}

if(!identical(start(BSobj2), sort(start(BSobj2)))) {
  stop("start positions in BSobj2 are not ordered")
}

BSobj2 <- NULL


ERROR: Error in eval(expr, envir, enclos): start positions in BSobj2 are not ordered


### Sanity checks

In [29]:
head(start(BSobj2))

In [23]:
first_meth_value_with_SNP_coverage

In [24]:
last_meth_value_with_SNP_coverage

In [25]:
first_meth_index_with_SNP_coverage

In [26]:
last_meth_index_with_SNP_coverage

In [33]:
start(BSobj2)[first_meth_index_with_SNP_coverage]

In [34]:
start(BSobj2)[last_meth_index_with_SNP_coverage]

In [20]:
head(start(BSobj2))

### Deploy

In [40]:
long_format$first_meth_index_with_SNP_coverage <-
long_format$last_meth_index_with_SNP_coverage <- 
long_format$first_meth_value_with_SNP_coverage <-
long_format$last_meth_value_with_SNP_coverage <- 
NA

for(i in 1:nrow(long_format)){
    print(i)
    pvar <- fread(gsub("pgen", "pvar", long_format$SNP_data[i]))[, 1:3]
    load(long_format$methylation_data[i])

    if(!identical(start(BSobj2), sort(start(BSobj2)))) {
      stop("start positions in BSobj2 are not ordered")
    }
    
    first_SNP_position <- min(pvar$POS)
    last_SNP_position <- max(pvar$POS)
    first_meth_position <- min(start(BSobj2))
    last_meth_position <- max(start(BSobj2))
    window_size <- 10000
    # Find first methylation index with SNP coverage
    first_meth_index_with_SNP_coverage <- which(start(BSobj2) >= (first_SNP_position - window_size) & 
                                                 start(BSobj2) <= (last_SNP_position + window_size))[1]
    # Correcting the approach to find the last methylation index with SNP coverage
    last_meth_index_with_SNP_coverage <- which(start(BSobj2) >= (first_SNP_position - window_size) & 
                                                start(BSobj2) <= (last_SNP_position + window_size))
    if(length(last_meth_index_with_SNP_coverage) > 0) {
      last_meth_index_with_SNP_coverage <- last_meth_index_with_SNP_coverage[length(last_meth_index_with_SNP_coverage)]
    } else {
      last_meth_index_with_SNP_coverage <- NA # In case no indices are found
    }
    
    # Values for the first and last methylation sites with SNP coverage
    if(!is.na(first_meth_index_with_SNP_coverage)) {
      first_meth_value_with_SNP_coverage <- start(BSobj2)[first_meth_index_with_SNP_coverage]
    } else {
      first_meth_value_with_SNP_coverage <- NA # In case no first index is found
    }
    
    if(!is.na(last_meth_index_with_SNP_coverage)) {
      last_meth_value_with_SNP_coverage <- start(BSobj2)[last_meth_index_with_SNP_coverage]
    } else {
      last_meth_value_with_SNP_coverage <- NA # In case no last index is found
    }
    

    
    BSobj2 <- NULL
    
    long_format$first_meth_index_with_SNP_coverage[i] <- first_meth_index_with_SNP_coverage
    long_format$last_meth_index_with_SNP_coverage[i] <- last_meth_index_with_SNP_coverage
    long_format$first_meth_value_with_SNP_coverage[i] <- first_meth_value_with_SNP_coverage
    long_format$last_meth_value_with_SNP_coverage[i] <- last_meth_value_with_SNP_coverage
}

[1] 1
[1] 2
[1] 3
[1] 4
[1] 5
[1] 6
[1] 7
[1] 8
[1] 9
[1] 10
[1] 11
[1] 12
[1] 13
[1] 14
[1] 15
[1] 16
[1] 17
[1] 18
[1] 19
[1] 20
[1] 21
[1] 22
[1] 23
[1] 24
[1] 25
[1] 26
[1] 27
[1] 28
[1] 29
[1] 30
[1] 31
[1] 32
[1] 33
[1] 34
[1] 35
[1] 36
[1] 37
[1] 38
[1] 39
[1] 40
[1] 41
[1] 42
[1] 43
[1] 44
[1] 45
[1] 46
[1] 47
[1] 48
[1] 49
[1] 50
[1] 51
[1] 52
[1] 53
[1] 54
[1] 55
[1] 56
[1] 57
[1] 58
[1] 59
[1] 60
[1] 61
[1] 62
[1] 63
[1] 64
[1] 65
[1] 66
[1] 67
[1] 68
[1] 69
[1] 70
[1] 71
[1] 72
[1] 73
[1] 74
[1] 75
[1] 76
[1] 77
[1] 78
[1] 79
[1] 80
[1] 81
[1] 82
[1] 83
[1] 84
[1] 85
[1] 86
[1] 87
[1] 88
[1] 89
[1] 90
[1] 91
[1] 92
[1] 93
[1] 94
[1] 95
[1] 96
[1] 97
[1] 98
[1] 99
[1] 100
[1] 101
[1] 102
[1] 103
[1] 104
[1] 105
[1] 106
[1] 107
[1] 108
[1] 109
[1] 110
[1] 111
[1] 112
[1] 113
[1] 114
[1] 115
[1] 116
[1] 117
[1] 118
[1] 119
[1] 120
[1] 121
[1] 122
[1] 123
[1] 124
[1] 125
[1] 126
[1] 127
[1] 128
[1] 129
[1] 130
[1] 131
[1] 132
[1] 133
[1] 134
[1] 135
[1] 136
[1] 137
[1] 138
[1] 

In [41]:
long_format

Chr,SNP_data,methylation_data,last_meth_value_with_SNP_coverage,first_meth_value_with_SNP_coverage,last_meth_index_with_SNP_coverage,first_meth_index_with_SNP_coverage
<dbl>,<chr>,<chr>,<int>,<int>,<int>,<int>
1,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr1.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr1_AA.rda,248918358,1069461,2202702,8982
2,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr2.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr2_AA.rda,241863783,10001,2019984,1
3,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr3.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr3_AA.rda,198099789,11602,1538467,1
4,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr4.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr4_AA.rda,189877411,69399,1387731,1
5,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr5.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr5_AA.rda,181172584,44104,1409038,1
6,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr6.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr6_AA.rda,170619093,192453,1412543,1138
7,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr7.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr7_AA.rda,159334659,49742,1490198,1
8,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr8.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr8_AA.rda,145078546,196751,1225856,1483
9,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr9.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr9_AA.rda,136932307,175723,1064212,2234
10,/expanse/lustre/projects/jhu152/naglemi/mwas/gwas//libd_chr10.pgen,/expanse/lustre/projects/jhu152/naglemi/mwas/pheno/caud/out/chr10_AA.rda,133625493,45719,1288155,1


In [42]:
fwrite(long_format, "09-OUT_matched_files_and_indices_to_test.csv")