# Take a close look at extreme p-values

In [1]:
library(data.table)

In [2]:
# bigdata <- fread("16a9par-OUT_stage2_MWAS_scz.csv")

# smallerdata <- bigdata[which(bigdata$p < 10^-200), ]

# dim(smallerdata)

# fwrite(smallerdata, "22_intermediate_extreme_stage2_MWAS_scz.csv")

In [3]:
smallerdata <- fread("22_intermediate_extreme_stage2_MWAS_scz.csv")

### What are the stage 1 weights for SNPs contributing to this CpG site?

In [4]:
weights <- readRDS("/expanse/lustre/projects/jhu152/naglemi/mwas/CpGWAS/output_EXPANSE_a2_caud/libd_chr1-chr1_AA-libd_chr1-chr1_AA-908982-928981-dynamic-1corestotal-allcorepera-20240415-134429.rds")

In [5]:
smallerdata <- smallerdata[which(smallerdata$population == "AA" & smallerdata$region == "caud"), ]

In [6]:
library(data.table)

#smallerdata_sorted <- smallerdata[order(smallerdata$pos), ]
#duplicated_pos <- smallerdata_sorted$pos[duplicated(smallerdata_sorted$pos)]
#smallerdata_sorted[smallerdata_sorted$pos %in% duplicated_pos, ]

smallerdata[, timestamp := sub(".*-(\\d{8}-\\d{6})$", "\\1", scaff)]
smallerdata <- smallerdata[order(pos, -timestamp)]

smallerdata <- smallerdata[, .SD[1], by = pos]

In [7]:
methylation_positions <- vector("list", length(weights@models))
snp_weights_list_of_lists <- vector("list", length(weights@models))
desired_methylation_positions <- vector("list", sum(smallerdata$pos %in% sapply(weights@models, function(x) x@methylationPosition)))
desired_snp_weights_list_of_lists <- vector("list", sum(smallerdata$pos %in% sapply(weights@models, function(x) x@methylationPosition)))
desired_snp_pvals <- vector("list", sum(smallerdata$pos %in% sapply(weights@models, function(x) x@methylationPosition)))

In [8]:
pos_index <- 1

for(i in seq_along(weights@models)){
    methylation_positions[[i]] <- weights@models[[i]]@methylationPosition
    snp_weights_list_of_lists[[i]] <- weights@models[[i]]@snpWeights

    if(weights@models[[i]]@methylationPosition %in% smallerdata$pos){
        desired_methylation_positions[[pos_index]] <- weights@models[[i]]@methylationPosition
        if(is.null(weights@models[[i]]@snpWeights)) stop("problem")
        desired_snp_weights_list_of_lists[[pos_index]] <- weights@models[[i]]@snpWeights
        desired_snp_pvals[[pos_index]] <- smallerdata$p[which(smallerdata$pos == weights@models[[i]]@methylationPosition)]
        pos_index <- pos_index + 1
    }
}

#methylation_positions <- unlist(methylation_positions)
#desired_methylation_positions <- unlist(desired_methylation_positions)
#desired_snp_pvals <- unlist(desired_snp_pvals)

In [9]:
desired_methylation_positions

In [10]:
desired_snp_pvals

In [11]:
# Combine all elements into a single data.table
df <- data.table(
  methylation_position = rep(desired_methylation_positions, lengths(desired_snp_weights_list_of_lists)),
  pval_mwas_stage2 = rep(desired_snp_pvals, lengths(desired_snp_weights_list_of_lists)),
  contributing_snp_name = unlist(lapply(desired_snp_weights_list_of_lists, function(x) sapply(strsplit(names(x), ":"), function(y) paste(y[1:4], collapse=":")))),
  contributing_snp_weight_mwas_stage1_prediction = unlist(desired_snp_weights_list_of_lists)
)


In [12]:
library(stringr)

In [13]:
df$contributing_snp_pos <- as.numeric(str_split_fixed(df$contributing_snp_name, ":", 3)[, 2])

In [14]:
head(df)

methylation_position,pval_mwas_stage2,contributing_snp_name,contributing_snp_weight_mwas_stage1_prediction,contributing_snp_pos
<list>,<list>,<chr>,<dbl>,<dbl>
73274305,0,chr1:73265462:C:T,-0.000396641,73265462
73274305,0,chr1:73269720:G:C,0.0004947975,73269720
73274305,0,chr1:73271206:C:T,-0.0004841998,73271206
73274305,0,chr1:73276935:T:G,-0.0004460208,73276935
73274305,0,chr1:73278190:A:G,0.000490016,73278190
73274305,0,chr1:73281554:T:C,-0.000422409,73281554


### What are the summary stat weights and SE for the same SNPs?

In [15]:
library(data.table)

In [16]:
ss_path <- "/home/naglemi/mwas/gwas/gwas_stat_scz"

In [17]:
snp.gwas2 <- fread(ss_path, skip = 1, header = FALSE)
colnames(snp.gwas2) <- strsplit(readLines(ss_path, n = 1), "\t")[[1]]

In [18]:
snp.gwas2 <- snp.gwas2[which(snp.gwas2$CHR == 1), ]

In [19]:
colnames(snp.gwas2) <- paste0("ss_", colnames(snp.gwas2))

In [20]:
colnames(snp.gwas2)[which(colnames(snp.gwas2)  == "ss_BP")] <- "contributing_snp_pos"

In [21]:
merged <- merge(df, snp.gwas2, by = "contributing_snp_pos")

In [22]:
# make sure we have same allele orientation in both. TRUE if so.
merged$contributing_snp_name_verified <- paste0("chr1:",
                                                merged$contributing_snp_pos,
                                                ":",
                                                merged$ss_A1,
                                                ":",
                                                merged$ss_A2)

all(merged$contributing_snp_name_verified == merged$contributing_snp_name)

In [23]:
# should all be unique
contributing_snp_pos_methylation_position_combo <- paste0(merged$contributing_snp_pos,
                                                          "_",
                                                          merged$methylation_position)
length(contributing_snp_pos_methylation_position_combo) == length(unique(contributing_snp_pos_methylation_position_combo))

In [24]:
view_cols <- c("contributing_snp_pos", "contributing_snp_name", "contributing_snp_name_verified", "ss_CHR", "ss_SNP", "ss_A1", "ss_A2")
#merged[, ..view_cols]


### Recompute the stage 2 result

### First we need the SNP data

#### Load all SNP data

In [25]:
library(CpGWAS)

In [26]:
paths <- list(pvar_path = "/expanse/lustre/projects/jhu152/naglemi/mwas/gwas/ref_EUR_chr1.pvar",
              pgen_path = "/expanse/lustre/projects/jhu152/naglemi/mwas/gwas/ref_EUR_chr1.pgen",
              psam_path = "/expanse/lustre/projects/jhu152/naglemi/mwas/gwas/ref_EUR_chr1.psam")

my_SNPs <- loadSNPData(paths$pvar_path, paths$pgen_path, paths$psam_path)

Now let's practice subsetting the SNP data to get the ones we want and need for a given methylation site

#### Get the SNP data for specific SNPs contributing to methylation site

##### What are those SNPs?

In [27]:
library(pgenlibr)

In [28]:
i <- 1

In [29]:
this_desired_methylation_position <- unlist(desired_methylation_positions[i])

In [30]:
this_desired_methylation_position

In [31]:
typeof(merged$methylation_position)
typeof(this_desired_methylation_position)

In [32]:
merged_this_methylation_position <- merged[methylation_position == this_desired_methylation_position, ]

In [33]:
snp_indices_of_interest <- which(my_SNPs$pvar_dt$POS %in% merged_this_methylation_position$contributing_snp_pos)

##### Extract and format the SNPs

In [34]:
G <- pgenlibr::ReadList(my_SNPs$pgen,
                        variant_subset = snp_indices_of_interest)

In [35]:
dim(G)

In [36]:
colnames(G) <- my_SNPs$pvar_dt$ID[snp_indices_of_interest]
rownames(G) <- my_SNPs$psam$IID

In [37]:
dim(G)

In [38]:
G

Unnamed: 0,rs6672818,rs61765637,rs12759031,rs11210191,rs11210193,rs7513593,rs11210195
HG00096,1,1,1,1,1,1,1
HG00097,0,0,0,0,0,0,0
HG00099,2,2,2,2,2,2,2
HG00101,1,1,1,1,1,1,1
HG00102,1,1,1,1,1,1,1
HG00103,2,2,2,2,2,2,2
HG00105,2,2,2,2,2,2,2
HG00107,1,1,1,1,1,1,1
HG00108,2,2,2,2,2,2,2
HG00109,1,1,1,1,1,1,1


### stage 2 MWAS

In [39]:
merged_this_methylation_position$logOR <- log(merged_this_methylation_position$ss_OR)
# merged_this_methylation_position$SElogOR <- merged_this_methylation_position$ss_SE / merged_this_methylation_position$ss_OR

# wrong way to compute it - no actually right
merged_this_methylation_position$z <- merged_this_methylation_position$logOR / merged_this_methylation_position$ss_SE

# The (right?) way to compute it? - no wrong
# merged_this_methylation_position$z <- merged_this_methylation_position$logOR / merged_this_methylation_position$SElogOR

In [40]:
merged_this_methylation_position$ss_SE

In [41]:
merged_this_methylation_position

contributing_snp_pos,methylation_position,pval_mwas_stage2,contributing_snp_name,contributing_snp_weight_mwas_stage1_prediction,ss_CHR,ss_SNP,ss_A1,ss_A2,ss_FRQ_A_53386,⋯,ss_Direction,ss_HetISqt,ss_HetDf,ss_HetPVa,ss_Nca,ss_Nco,ss_Neff,contributing_snp_name_verified,logOR,z
<int>,<list>,<list>,<chr>,<dbl>,<int>,<chr>,<chr>,<chr>,<dbl>,⋯,<chr>,<dbl>,<int>,<dbl>,<int>,<int>,<dbl>,<chr>,<dbl>,<dbl>
73265462,73274305,0,chr1:73265462:C:T,-0.000396641,1,rs6672818,C,T,0.483,⋯,+-++-+-+-+-++-++-+-++++++-++++++-+-++-+-++++-++-+-++--+++-++++++-++++++++---,14.3,75,0.1531,53386,77258,58749.13,chr1:73265462:C:T,-0.06230103,-7.244305
73269720,73274305,0,chr1:73269720:G:C,0.0004947975,1,rs61765637,G,C,0.523,⋯,-+--+-+-+-++-++-+-+------+-+----+-+--+-+----+--+-+--++---+-----++--------+++,14.9,75,0.1418,53386,77258,58749.13,chr1:73269720:G:C,0.06240187,7.256031
73271206,73274305,0,chr1:73271206:C:T,-0.0004841998,1,rs12759031,C,T,0.481,⋯,+-++-+-+-+--+-++-+-++++++-++++++-+-++-+-++++-++-+-++--+++-+++-++-++++++++---,13.9,75,0.1596,53386,77258,58749.13,chr1:73271206:C:T,-0.06209883,-7.220795
73276935,73274305,0,chr1:73276935:T:G,-0.0004460208,1,rs11210191,T,G,0.483,⋯,-+--+-+-+-++-+--+-+------+------+-+--+-+----+--+-+--++---+------+--------+++,14.8,75,0.144,53386,77258,58749.13,chr1:73276935:T:G,-0.06230103,-7.244305
73278190,73274305,0,chr1:73278190:A:G,0.000490016,1,rs11210193,A,G,0.523,⋯,--++-+-+-+--+--+-+-++++++-+-++++---++-+-++++-++-+-++--+++-+++++--++++++++---,15.6,75,0.1304,53386,77258,58749.13,chr1:73278190:A:G,0.06240187,7.256031
73281554,73274305,0,chr1:73281554:T:C,-0.000422409,1,rs7513593,T,C,0.483,⋯,-+--+-+-+-++-+--+-+------+------+-+--+-+----+--+-+--++---+------+--------+++,15.0,75,0.1408,53386,77258,58749.13,chr1:73281554:T:C,-0.06259907,-7.278962
73283600,73274305,0,chr1:73283600:C:T,0.0003856033,1,rs11210195,C,T,0.524,⋯,++--+-+-+-++-++-+-+------+-+----+++--+-+----+--+-+--++---+-----++--------+++,16.3,75,0.1191,53386,77258,58749.13,chr1:73283600:C:T,0.06270246,7.290984


In [42]:
merged_this_methylation_position$ss_P

In [43]:
mwas <- function(z, w, G){   
  if(length(w) > 1){
    #recover()
    # z-scores for effect of SNPs on external phenotype
    #. are weighted according to weights for effect of SNPs on methylation
    z <- z %*% w
    print(paste0("Weighted z-score is: ", z))
    # compute correlation matrix of SNP matrix, which captures LD structure
    print("We are looking at head of dosage data in genotype matrix G:")
    print(head(G))
    z.cor <- cor(G)
    print(paste0("z.cor is ", z.cor))
    # add small value to diagonal to avoid singular matrix
    #  which may otherwise happen if two SNPs in perfect LD
    z.cor <- z.cor + diag(dim(z.cor)[1])*0.1 
    # variance of correlated variables is weighted sum 
    # multiplying w by corr matrix once gives a vector representing
    #. the variance of each individual SNP and the extent to which they are
    #. influenced by other SNPs. Multiplying again by w sums up pairwise contributions
    #. and reflects total variance of weighted sum.
    #. the first w is automatically transposed by R so we don't have to write t(w)
    se <- sqrt(w %*%  z.cor %*%  w)
    print(paste0("Standard error is: ", se))
    z <- z/se
    p <- pnorm(abs(z), lower.tail=F)*2
    return(c(z=z, p=p, n=length(w)))
  } else {
    p <- pnorm(abs(z), lower.tail=F)*2
    return(c(z=z, p=p, n=1))
  }
}

In [44]:
mwas(z = merged_this_methylation_position$z,
     w = merged_this_methylation_position$contributing_snp_weight_mwas_stage1_prediction,
     G = G)

[1] "Weighted z-score is: 0.0226327701040369"
[1] "We are looking at head of dosage data in genotype matrix G:"
        rs6672818 rs61765637 rs12759031 rs11210191 rs11210193 rs7513593
HG00096         1          1          1          1          1         1
HG00097         0          0          0          0          0         0
HG00099         2          2          2          2          2         2
HG00101         1          1          1          1          1         1
HG00102         1          1          1          1          1         1
HG00103         2          2          2          2          2         2
        rs11210195
HG00096          1
HG00097          0
HG00099          2
HG00101          1
HG00102          1
HG00103          2
 [1] "z.cor is 1"                 "z.cor is 0.993873271168855"
 [3] "z.cor is 1"                 "z.cor is 1"                
 [5] "z.cor is 0.993873271168855" "z.cor is 0.997944928976738"
 [7] "z.cor is 0.993873271168855" "z.cor is 0.993873271168855"

In [45]:
mwas(z = merged_this_methylation_position$z,
     w = merged_this_methylation_position$contributing_snp_weight_mwas_stage1_prediction,
     G = G)

[1] "Weighted z-score is: 0.0226327701040369"
[1] "We are looking at head of dosage data in genotype matrix G:"
        rs6672818 rs61765637 rs12759031 rs11210191 rs11210193 rs7513593
HG00096         1          1          1          1          1         1
HG00097         0          0          0          0          0         0
HG00099         2          2          2          2          2         2
HG00101         1          1          1          1          1         1
HG00102         1          1          1          1          1         1
HG00103         2          2          2          2          2         2
        rs11210195
HG00096          1
HG00097          0
HG00099          2
HG00101          1
HG00102          1
HG00103          2
 [1] "z.cor is 1"                 "z.cor is 0.993873271168855"
 [3] "z.cor is 1"                 "z.cor is 1"                
 [5] "z.cor is 0.993873271168855" "z.cor is 0.997944928976738"
 [7] "z.cor is 0.993873271168855" "z.cor is 0.993873271168855"

##### An earlier attempt where we used ss instead of the merged df that contains it and other stuff too

In [46]:
ss_snps_for_this_cpg <- snp.gwas2[which(snp.gwas2$contributing_snp_pos %in% merged_this_methylation_position$contributing_snp_pos), ]

In [47]:
ss_snps_for_this_cpg <- ss_snps_for_this_cpg[order(ss_snps_for_this_cpg$contributing_snp_pos), ]

In [48]:
ss_snps_for_this_cpg

ss_CHR,ss_SNP,contributing_snp_pos,ss_A1,ss_A2,ss_FRQ_A_53386,ss_FRQ_U_77258,ss_INFO,ss_OR,ss_SE,ss_P,ss_ngt,ss_Direction,ss_HetISqt,ss_HetDf,ss_HetPVa,ss_Nca,ss_Nco,ss_Neff
<int>,<chr>,<int>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<int>,<chr>,<dbl>,<int>,<dbl>,<int>,<int>,<dbl>
1,rs6672818,73265462,C,T,0.483,0.496,0.991,0.9396,0.0086,5.189e-13,0,+-++-+-+-+-++-++-+-++++++-++++++-+-++-+-++++-++-+-++--+++-++++++-++++++++---,14.3,75,0.1531,53386,77258,58749.13
1,rs61765637,73269720,G,C,0.523,0.51,0.993,1.06439,0.0086,4.623e-13,0,-+--+-+-+-++-++-+-+------+-+----+-+--+-+----+--+-+--++---+-----++--------+++,14.9,75,0.1418,53386,77258,58749.13
1,rs12759031,73271206,C,T,0.481,0.494,0.992,0.93979,0.0086,5.844e-13,0,+-++-+-+-+--+-++-+-++++++-++++++-+-++-+-++++-++-+-++--+++-+++-++-++++++++---,13.9,75,0.1596,53386,77258,58749.13
1,rs11210191,73276935,T,G,0.483,0.496,0.996,0.9396,0.0086,4.661e-13,0,-+--+-+-+-++-+--+-+------+------+-+--+-+----+--+-+--++---+------+--------+++,14.8,75,0.144,53386,77258,58749.13
1,rs11210193,73278190,A,G,0.523,0.51,0.996,1.06439,0.0086,4.152e-13,17,--++-+-+-+--+--+-+-++++++-+-++++---++-+-++++-++-+-++--+++-+++++--++++++++---,15.6,75,0.1304,53386,77258,58749.13
1,rs7513593,73281554,T,C,0.483,0.496,0.996,0.93932,0.0086,3.448e-13,1,-+--+-+-+-++-+--+-+------+------+-+--+-+----+--+-+--++---+------+--------+++,15.0,75,0.1408,53386,77258,58749.13
1,rs11210195,73283600,C,T,0.524,0.51,0.996,1.06471,0.0086,3.261e-13,6,++--+-+-+-++-++-+-+------+-+----+++--+-+----+--+-+--++---+-----++--------+++,16.3,75,0.1191,53386,77258,58749.13


In [49]:
ss_snps_for_this_cpg$logOR <- log(ss_snps_for_this_cpg$ss_OR)
ss_snps_for_this_cpg$SElogOR <- ss_snps_for_this_cpg$ss_SE / ss_snps_for_this_cpg$ss_OR

In [50]:
# The (right?) way to compute it?
ss_snps_for_this_cpg$z <- ss_snps_for_this_cpg$logOR / ss_snps_for_this_cpg$SElogOR

In [51]:
ss_snps_for_this_cpg

ss_CHR,ss_SNP,contributing_snp_pos,ss_A1,ss_A2,ss_FRQ_A_53386,ss_FRQ_U_77258,ss_INFO,ss_OR,ss_SE,⋯,ss_Direction,ss_HetISqt,ss_HetDf,ss_HetPVa,ss_Nca,ss_Nco,ss_Neff,logOR,SElogOR,z
<int>,<chr>,<int>,<chr>,<chr>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<chr>,<dbl>,<int>,<dbl>,<int>,<int>,<dbl>,<dbl>,<dbl>,<dbl>
1,rs6672818,73265462,C,T,0.483,0.496,0.991,0.9396,0.0086,⋯,+-++-+-+-+-++-++-+-++++++-++++++-+-++-+-++++-++-+-++--+++-++++++-++++++++---,14.3,75,0.1531,53386,77258,58749.13,-0.06230103,0.009152831,-6.806749
1,rs61765637,73269720,G,C,0.523,0.51,0.993,1.06439,0.0086,⋯,-+--+-+-+-++-++-+-+------+-+----+-+--+-+----+--+-+--++---+-----++--------+++,14.9,75,0.1418,53386,77258,58749.13,0.06240187,0.008079745,7.723247
1,rs12759031,73271206,C,T,0.481,0.494,0.992,0.93979,0.0086,⋯,+-++-+-+-+--+-++-+-++++++-++++++-+-++-+-++++-++-+-++--+++-+++-++-++++++++---,13.9,75,0.1596,53386,77258,58749.13,-0.06209883,0.009150981,-6.78603
1,rs11210191,73276935,T,G,0.483,0.496,0.996,0.9396,0.0086,⋯,-+--+-+-+-++-+--+-+------+------+-+--+-+----+--+-+--++---+------+--------+++,14.8,75,0.144,53386,77258,58749.13,-0.06230103,0.009152831,-6.806749
1,rs11210193,73278190,A,G,0.523,0.51,0.996,1.06439,0.0086,⋯,--++-+-+-+--+--+-+-++++++-+-++++---++-+-++++-++-+-++--+++-+++++--++++++++---,15.6,75,0.1304,53386,77258,58749.13,0.06240187,0.008079745,7.723247
1,rs7513593,73281554,T,C,0.483,0.496,0.996,0.93932,0.0086,⋯,-+--+-+-+-++-+--+-+------+------+-+--+-+----+--+-+--++---+------+--------+++,15.0,75,0.1408,53386,77258,58749.13,-0.06259907,0.009155559,-6.837274
1,rs11210195,73283600,C,T,0.524,0.51,0.996,1.06471,0.0086,⋯,++--+-+-+-++-++-+-+------+-+----+++--+-+----+--+-+--++---+-----++--------+++,16.3,75,0.1191,53386,77258,58749.13,0.06270246,0.008077317,7.762783


In [52]:
dim(merged)

In [53]:
colnames(merged)

In [54]:
head(merged)

contributing_snp_pos,methylation_position,pval_mwas_stage2,contributing_snp_name,contributing_snp_weight_mwas_stage1_prediction,ss_CHR,ss_SNP,ss_A1,ss_A2,ss_FRQ_A_53386,⋯,ss_P,ss_ngt,ss_Direction,ss_HetISqt,ss_HetDf,ss_HetPVa,ss_Nca,ss_Nco,ss_Neff,contributing_snp_name_verified
<int>,<list>,<list>,<chr>,<dbl>,<int>,<chr>,<chr>,<chr>,<dbl>,⋯,<dbl>,<int>,<chr>,<dbl>,<int>,<dbl>,<int>,<int>,<dbl>,<chr>
73265462,73274305,0,chr1:73265462:C:T,-0.000396641,1,rs6672818,C,T,0.483,⋯,5.189e-13,0,+-++-+-+-+-++-++-+-++++++-++++++-+-++-+-++++-++-+-++--+++-++++++-++++++++---,14.3,75,0.1531,53386,77258,58749.13,chr1:73265462:C:T
73269720,73274305,0,chr1:73269720:G:C,0.0004947975,1,rs61765637,G,C,0.523,⋯,4.623e-13,0,-+--+-+-+-++-++-+-+------+-+----+-+--+-+----+--+-+--++---+-----++--------+++,14.9,75,0.1418,53386,77258,58749.13,chr1:73269720:G:C
73271206,73274305,0,chr1:73271206:C:T,-0.0004841998,1,rs12759031,C,T,0.481,⋯,5.844e-13,0,+-++-+-+-+--+-++-+-++++++-++++++-+-++-+-++++-++-+-++--+++-+++-++-++++++++---,13.9,75,0.1596,53386,77258,58749.13,chr1:73271206:C:T
73276935,73274305,0,chr1:73276935:T:G,-0.0004460208,1,rs11210191,T,G,0.483,⋯,4.661e-13,0,-+--+-+-+-++-+--+-+------+------+-+--+-+----+--+-+--++---+------+--------+++,14.8,75,0.144,53386,77258,58749.13,chr1:73276935:T:G
73278190,73274305,0,chr1:73278190:A:G,0.000490016,1,rs11210193,A,G,0.523,⋯,4.152e-13,17,--++-+-+-+--+--+-+-++++++-+-++++---++-+-++++-++-+-++--+++-+++++--++++++++---,15.6,75,0.1304,53386,77258,58749.13,chr1:73278190:A:G
73281554,73274305,0,chr1:73281554:T:C,-0.000422409,1,rs7513593,T,C,0.483,⋯,3.448e-13,1,-+--+-+-+-++-+--+-+------+------+-+--+-+----+--+-+--++---+------+--------+++,15.0,75,0.1408,53386,77258,58749.13,chr1:73281554:T:C


In [None]:
mwas(z = ss_snps_for_this_cpg$z,
     w = ss_snps_for_this_cpg$)

In [None]:
mwas <- function(z, w, G){   
  if(length(w) > 1){
    #recover()
    # z-scores for effect of SNPs on external phenotype
    #. are weighted according to weights for effect of SNPs on methylation
    z <- z %*% w
    # compute correlation matrix of SNP matrix, which captures LD structure
    z.cor <- cor(G)
    # add small value to diagonal to avoid singular matrix
    #  which may otherwise happen if two SNPs in perfect LD
    z.cor <- z.cor + diag(dim(z.cor)[1])*0.1 
    # variance of correlated variables is weighted sum 
    # multiplying w by corr matrix once gives a vector representing
    #. the variance of each individual SNP and the extent to which they are
    #. influenced by other SNPs. Multiplying again by w sums up pairwise contributions
    #. and reflects total variance of weighted sum.
    #. the first w is automatically transposed by R so we don't have to write t(w)
    se <- sqrt(w %*%  z.cor %*%  w)
    z <- z/se
    p <- pnorm(abs(z), lower.tail=F)*2
    return(c(z=z, p=p, n=length(w)))
  } else {
    p <- pnorm(abs(z), lower.tail=F)*2
    return(c(z=z, p=p, n=1))
  }
}

In [None]:
mwas()

In [57]:
getwd()

In [61]:
# List files recursively in the current working directory
files <- list.files(path = "/expanse/lustre/projects/jhu152/naglemi/mwas/CpGWAS/", pattern = "", full.names = TRUE, recursive = TRUE)

# Filter files containing both 'chr1-chr1' and '_results.rds'
files_to_rename <- grep("chr1-chr1.*_results\\.rds", files, value = TRUE)

In [62]:
# Rename files using a loop
for (file in files_to_rename) {
  new_name <- gsub("_results\\.rds", "_oldresults.rds", file)
  #print(file)
  #print(new_name)
  #print("next")
  file.rename(file, new_name)
}