# Compare MWAS methods for sanity tests and troubleshooting

In this version, we use the full new SNP set with a selected window

# Pick the regions we will test

In [1]:
library(data.table)

prev_hits <- fread("
Chr     pos        old_z       old_p
11   38247902        -27.1535308285104       2.30024742330298e-162
2    47933357        18.3327793004811        4.53147699327216e-75
7    1987910         10.1255085321387        4.25759742738181e-24
7    1987896         10.1255085321387        4.25759742738189e-24
7    1987797         10.0241523897721        1.19379483108027e-23
7    1987778         10.0105851568856        1.36940148731912e-23
12   2194742         -10.0072634920486       1.41615523554955e-23
")


In [2]:
#df <- fread("09.5-OUT_matched_SNP_meth_cov_chunked_EXPANSE_a2.csv")

In [3]:
df <- fread("09-OUT_matched_SNP_meth_cov_a2.csv")

## Try original code

In [4]:
###### model: learn elastic net model on training data 
######---------Input: trainX, trainY
######---------Return: selected features and coefficents

# original
elastic.net <- function(trainX,trainY){
    if(nrow(trainX)!=length(trainY)){
            stop("Number of observations is differerent")
    } 

    # optimize alpha---mixing parameter  
    a <- 0.5
    search <- foreach(ai = a, .combine = rbind) %dopar% {
        cv.fit <- cv.glmnet(
                        trainX,
                        trainY,
                        nfold = 5,
                        type.measure = "mse",
                        paralle = TRUE,
                        alpha = ai
                        )
        data.frame(
                        cvm = min(cv.fit$cvm),
                        lambda = cv.fit$lambda.min,
                        alpha = ai
                        )
        } 
    cv.opt <- search[search$cvm == min(search$cvm),] 

        # fit model by optimized alpha and lambda
        yfit = glmnet(
        trainX,
        trainY,
        lambda = cv.opt$lambda,
        alpha = cv.opt$alpha
                )       
        idf <- coef(yfit)
        idx <- which(idf != 0)
        selectf <- data.frame(
                features = idf@Dimnames[[1]][idx], 
                coefs = idf [idx]
        )
}

# modified to use lambda 1se and appropriate cvm
elastic.net <- function(trainX,trainY){
    if(nrow(trainX)!=length(trainY)){
            stop("Number of observations is differerent")
    } 

    # optimize alpha---mixing parameter  
    a <- 0.5
    search <- foreach(ai = a, .combine = rbind) %dopar% {
        set.seed(42)
        cv.fit <- cv.glmnet(
                        trainX,
                        trainY,
                        nfold = 5,
                        type.measure = "mse",
                        paralle = TRUE,
                        alpha = ai
                        )
        print(paste0("Dim of trainX: ", dim(trainX)))
        print(paste0("Len of trainY: ", length(trainY)))
        coef_matrix <- as.matrix(coef(cv.fit))

        non_zero_coefs <- coef_matrix[coef_matrix != 0, , drop = FALSE]
        print("Coefficients when fitting: ")
        print(non_zero_coefs)

        data.frame(
                        cvm = cv.fit$cvm[cv.fit$lambda == cv.fit$lambda.1se],
                        lambda = cv.fit$lambda.1se,
                        alpha = ai
                        )
        } 
    cv.opt <- search[search$cvm == min(search$cvm),] 

        # fit model by optimized alpha and lambda
    set.seed(42)
    yfit <- glmnet(
    trainX,
    trainY,
    lambda = cv.opt$lambda,
    alpha = cv.opt$alpha)

    coef_matrix <- as.matrix(coef(yfit))

    non_zero_coefs <- coef_matrix[coef_matrix != 0, , drop = FALSE]
    print("Coefficients when optimal: ")
    print(non_zero_coefs)

    idf <- coef(yfit)
    idx <- which(idf != 0)
    selectf <- data.frame(
            features = idf@Dimnames[[1]][idx], 
            coefs = idf [idx]
    )
}

MWAS <- function(gwas, weight, geno){
        z <- gwas %*% weight
        z.cor <- cor(geno)
        se <- sqrt(weight %*%  z.cor %*%  weight)
        z <- z/se
        p=pnorm(abs(z),lower.tail=F)*2
        return(c(z, p))
}

In [5]:
df <- df[which(df$Chr == 7), ]

In [6]:
i <- 2

In [7]:
library("glmnet")
library("e1071")
library("doParallel")

set.seed(2018)
wind <- c(5000,10000)
# output directory
#outd <- "/dcl02/lieber/shan/shizhong/finemapping/GWAS/tags/scz3/mwas/chr22/1/"
outd <- "20-OUT_original_mwas_sanity_test/"

Loading required package: Matrix

Loaded glmnet 4.1-8

Loading required package: foreach

Loading required package: iterators

Loading required package: parallel



## Replace all old objects with new objects in same format (except covariates)

In [8]:
load("p1.rda", verbose = TRUE)

Loading objects:
  snp.gwas2
  snp.1kg.eur2
  map.1kg.eur2
  snp2
  map2
  p
  BSsample


### SNPs

In [9]:
p[1:10, 1:10]

0,1,2,3,4,5,6,7,8,9,10
1980077,0.9623693,0.9533113,0.9488137,0.9382598,0.9446401,0.9215182,0.9263652,0.9588217,0.9330597,0.9588301
1980101,0.9623933,0.9536637,0.9487929,0.9372716,0.9453945,0.9216584,0.9256115,0.9593576,0.9326077,0.9595451
1980129,0.962326,0.9543107,0.9485887,0.9362757,0.946074,0.9218622,0.9248481,0.9598699,0.9323198,0.9601973
1980136,0.9622831,0.9545096,0.9484938,0.9360728,0.9462013,0.9219193,0.9247144,0.9599708,0.9323138,0.9603281
1980145,0.9622125,0.9547853,0.9483458,0.9358404,0.9463407,0.9219962,0.9245785,0.9600847,0.9323456,0.9604767
1980179,0.9617977,0.9560054,0.9475364,0.9352418,0.946639,0.9223291,0.924415,0.9603628,0.9328418,0.9608582
1980183,0.9617343,0.9561653,0.9474163,0.9351995,0.946652,0.9223756,0.9244306,0.9603806,0.9329368,0.9608897
1980193,0.9615631,0.9565781,0.9470942,0.9351188,0.9466655,0.9224991,0.9244996,0.9604121,0.9332054,0.960958
1980205,0.9613345,0.9570958,0.9466676,0.9350679,0.9466476,0.9226612,0.9246379,0.960426,0.9335838,0.9610206
1980239,0.9605591,0.9586691,0.9452337,0.9351803,0.9464118,0.9231993,0.9253325,0.9603334,0.9349537,0.9610909


### Methylation data

In [10]:
suppressWarnings(library(bsseq))

Loading required package: BiocGenerics


Attaching package: ‘BiocGenerics’


The following objects are masked from ‘package:stats’:

    IQR, mad, sd, var, xtabs


The following objects are masked from ‘package:base’:

    anyDuplicated, aperm, append, as.data.frame, basename, cbind,
    colnames, dirname, do.call, duplicated, eval, evalq, Filter, Find,
    get, grep, grepl, intersect, is.unsorted, lapply, Map, mapply,
    match, mget, order, paste, pmax, pmax.int, pmin, pmin.int,
    Position, rank, rbind, Reduce, rownames, sapply, setdiff, sort,
    table, tapply, union, unique, unsplit, which.max, which.min


Loading required package: GenomicRanges

Loading required package: stats4

Loading required package: S4Vectors


Attaching package: ‘S4Vectors’


The following objects are masked from ‘package:Matrix’:

    expand, unname


The following objects are masked from ‘package:data.table’:

    first, second


The following object is masked from ‘package:utils’:

    findMatches


The

In [11]:
# load data for mwas
# load("./rda/caudate_mwas_data_chr22.rda")
load(df$methylation_data[i])

p <- getMeth(BSobj2)


rownames(p) <- start(BSobj2)

sites_to_test <- which(start(BSobj2) >= (1987910 - 500) & start(BSobj2) <= (1987910 + 500))
p <- p[sites_to_test, ]

# candidate cg
cg <- as.numeric(rownames(p))

# regress out covariates
#load("covs_for_meqtl.rda")

### covariates

In [12]:
load("covs_for_meqtl.rda")
BSobj2$brnum <- gsub("Br0", "Br", BSobj2$brnum)
rownames(covs) <- gsub("Br0", "Br", rownames(covs))
mat <- match(BSobj2$brnum,colnames(covs)) 
covs <- t(covs[,mat])

In [13]:
head(covs)

Unnamed: 0,mds$snpPC1,mds$snpPC2,mds$snpPC3,mds$snpPC4,mds$snpPC5,PC1,PC2,PC3,PC4,PC5,⋯,PC19,PC20,PC21,PC22,PC23,PC24,PC25,PC26,PC27,PC28
Br1122,0.0533622,-0.00493535,0.00324252,-0.00248045,-0.00156466,-28.662568,14.197826,-1.374641,31.370322,10.599321,⋯,-6.188123,5.066481,-7.593742,14.762618,-0.999303,7.784856,-6.750361,0.6819553,-4.067829,6.0399797
Br2285,-0.0615293,-0.00774836,0.00263268,-0.000332221,-0.00443067,7.868489,6.519323,25.379744,3.596101,12.190329,⋯,3.897745,-11.184795,4.910411,-5.737297,13.716783,-11.824779,6.780192,3.2390141,4.800311,-2.2493247
Br1764,-0.0601464,-0.00559244,0.000477782,-0.00115565,-0.00388136,-7.209888,17.143397,18.170361,-2.863536,1.119479,⋯,-10.379832,6.763648,-2.738643,-9.204381,5.927248,11.778903,16.207966,-8.3733597,2.676408,-4.9324159
Br1464,0.0555706,-0.00321112,0.00312379,0.000378073,-0.00153485,-15.887979,-27.271631,-11.113779,-15.752138,6.550067,⋯,7.977551,13.249212,-7.630136,-9.933193,1.287399,-3.231867,-28.76307,-6.1366292,23.160872,6.3218393
Br5062,-0.0625893,0.0236126,-0.00073594,-0.00195798,-0.00334107,21.936325,11.6501,8.137399,-13.960197,-10.980829,⋯,5.584715,-5.241646,-6.839111,2.968352,-13.821436,9.723883,5.313466,-3.6308581,1.554839,-5.660891
Br1446,-0.0606799,-0.00542077,-0.000522544,-0.00383369,0.0020026,41.136001,7.668123,8.333769,-6.673114,1.862816,⋯,-11.593362,-9.878328,5.129886,-1.385505,8.704721,6.65587,3.770619,3.5519316,1.804099,-0.4805721


In [14]:
#covs <- fread(df$cov_file[i])
#covs <- t(covs)
#colnames(covs) <- covs[1, ]
#covs <- covs[2:nrow(covs), ]
# transpose so we have same orientation as original code

### Regress methylation data over covariates

In [15]:
#mat <- match(BSobj2$brnum,colnames(covs)) 
#covs <- t(covs[,mat])
p.residual=matrix(NA,dim(p)[1],dim(p)[2])

In [16]:
#rownames(covs)[is.na(covs[, 'genoPC1'])] <- BSobj2$brnum[is.na(covs[, 'genoPC1'])]

In [17]:
#colnames(p.residual) <- BSobj2$brnum

In [18]:
# covs <- as.data.frame(covs)
# # Convert all columns except Dx and Sex from character to numeric
# cols_to_convert <- setdiff(names(covs), c("Dx", "Sex"))

# for (col in cols_to_convert) {
#   covs[[col]] <- as.numeric(covs[[col]])
# }

# # Print the modified data frame to check the conversion
# #print(dat)


In [19]:
for (i in 1:dim(p)[1]) { # For each methylation site
    dat <- as.data.frame(cbind(y = p[i,], covs))
    
    # Check for rows with NAs (the ones for which we don't have covariate data)
    valid_rows <- complete.cases(dat)
    
    if (sum(valid_rows) > 0) {
        dat_valid <- dat[valid_rows,]
        model.res <- lm(y ~ ., data = dat_valid)
        
        # Store residuals in the corresponding positions
        p.residual[i, valid_rows] <- resid(model.res)
    }
}


# for(i in 1:dim(p)[1]){ # foro each methylation site
#         dat <- as.data.frame(cbind(p[i,],covs))
#         colnames(dat) <- c("y",paste0("x",1:ncol(covs)))
#         model.res <- lm(reformulate(paste0("x",1:ncol(covs)), "y"),dat)
#         p.residual[i,] = resid(model.res) 
# }

In [20]:
snp.gwas2 <- NULL

In [21]:
# load("p1.rda")

In [22]:
# min(snp.gwas2$pos_hg38)
# max(snp.gwas2$pos_hg38)

In [23]:
# load("p1.rda")
# pos_we_got <- snp.gwas2$pos_hg38
# saveRDS(pos_we_got, "20-intermediate_positions_in_old_set.csv")

In [24]:
pos_we_got <- readRDS("20-intermediate_positions_in_old_set.csv")

In [25]:
# head(snp.gwas2)

### summary stats

In [26]:
library(data.table)
library(CpGWAS)

In [27]:
ss_path <- "/home/naglemi/mwas/gwas/gwas_stat_scz"

In [1]:
snp.gwas2 <- fread(ss_path, skip = 1, header = FALSE)
colnames(snp.gwas2) <- strsplit(readLines(ss_path, n = 1), "\t")[[1]]

ERROR: Error in fread(ss_path, skip = 1, header = FALSE): could not find function "fread"


In [29]:
snp.gwas2$z <- log(snp.gwas2$OR)/snp.gwas2$SE

In [30]:
snp.gwas2 <- snp.gwas2[, c(2, 1, 3, 3, 8, 4, 5, 20, 11)]

In [31]:
head(snp.gwas2, n = 1)

SNP,CHR,BP,BP,INFO,A1,A2,z,P
<chr>,<int>,<int>,<int>.1,<dbl>,<chr>,<chr>,<dbl>,<dbl>
rs62513865,8,100579985,100579985,0.963,C,T,0.7016221,0.4847


In [32]:
colnames(snp.gwas2)[1:5] <- c("snp", "chr", "pos_hg38", "pos_hg38", "info")

In [33]:
snp.gwas2 <- snp.gwas2[which(snp.gwas2$chr == 7 & snp.gwas2$pos_hg38 >= 1963098 & snp.gwas2$pos_hg38 <= 2009071), ]

In [34]:
dim(snp.gwas2)

In [35]:
snp.gwas2 <- snp.gwas2[order(snp.gwas2$pos_hg38), ]

In [36]:
head(snp.gwas2)

snp,chr,pos_hg38,pos_hg38,info,A1,A2,z,P
<chr>,<int>,<int>,<int>.1,<dbl>,<chr>,<chr>,<dbl>,<dbl>
rs11773627,7,1963098,1963098,0.987,T,C,-5.211806,1.955e-07
rs6972374,7,1963408,1963408,0.982,C,T,-5.28832,1.22e-07
rs7795303,7,1963697,1963697,0.995,T,C,-2.367885,0.01805
rs6946691,7,1964758,1964758,0.995,A,T,-2.339622,0.01912
rs12666575,7,1964786,1964786,0.993,C,T,8.855337,6.441e-19
rs61467855,7,1964869,1964869,0.994,G,A,2.49083,0.01263


In [37]:
snp.gwas2 <- snp.gwas2[which(snp.gwas2$pos_hg38 %in% pos_we_got), ]

In [38]:
dim(snp.gwas2)

In [39]:
# built predition models
idx.ea <- BSobj2$race == "CAUC"

In [40]:
head(snp2)

Unnamed: 0_level_0,Br0836,Br0845,Br0848,Br0863,Br0914,Br0948,Br0949,Br0963,Br0983,Br0991,⋯,Br5373,Br5398,Br5422,Br5426,Br5460,Br5467,Br5475,Br5488,Br5584,Br5590
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
chr7:1963098:T:C,2.0,2,2.0,2,2.0,1.999,2.0,1.974,1.0,2,⋯,1.954,2,2,2,2,2,2,1,1,1
chr7:1963408:C:T,0.001,2,1.991,0,1.969,0.006,0.004,0.14,2.0,0,⋯,2.0,0,1,2,2,1,1,2,2,2
chr7:1964786:C:T,2.0,2,1.0,2,2.0,2.0,2.0,2.0,1.0,2,⋯,0.001,2,2,0,2,2,2,1,1,2
chr7:1966112:T:C,2.0,2,1.001,2,2.0,2.0,2.0,2.0,1.002,2,⋯,0.015,2,2,0,2,2,2,1,1,2
chr7:1973362:G:A,2.0,2,1.095,2,2.0,2.0,0.004,2.0,1.002,2,⋯,0.015,2,2,0,2,1,2,1,1,2
chr7:1975412:T:G,2.0,2,2.0,2,2.0,2.0,2.0,2.0,1.0,2,⋯,2.0,2,2,2,2,2,2,1,1,1


In [41]:
snp2_sorted <- snp2[, order(names(snp2))]

In [42]:
colnames(snp2) <- gsub("Br0", "Br", colnames(snp2))

In [43]:
head(snp2)

Unnamed: 0_level_0,Br836,Br845,Br848,Br863,Br914,Br948,Br949,Br963,Br983,Br991,⋯,Br5373,Br5398,Br5422,Br5426,Br5460,Br5467,Br5475,Br5488,Br5584,Br5590
Unnamed: 0_level_1,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,⋯,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>,<dbl>
chr7:1963098:T:C,2.0,2,2.0,2,2.0,1.999,2.0,1.974,1.0,2,⋯,1.954,2,2,2,2,2,2,1,1,1
chr7:1963408:C:T,0.001,2,1.991,0,1.969,0.006,0.004,0.14,2.0,0,⋯,2.0,0,1,2,2,1,1,2,2,2
chr7:1964786:C:T,2.0,2,1.0,2,2.0,2.0,2.0,2.0,1.0,2,⋯,0.001,2,2,0,2,2,2,1,1,2
chr7:1966112:T:C,2.0,2,1.001,2,2.0,2.0,2.0,2.0,1.002,2,⋯,0.015,2,2,0,2,2,2,1,1,2
chr7:1973362:G:A,2.0,2,1.095,2,2.0,2.0,0.004,2.0,1.002,2,⋯,0.015,2,2,0,2,1,2,1,1,2
chr7:1975412:T:G,2.0,2,2.0,2,2.0,2.0,2.0,2.0,1.0,2,⋯,2.0,2,2,2,2,2,2,1,1,1


In [44]:
dim(snp2)

## I think a problem happens here

In [45]:
snp2 <- snp2[, colnames(snp2) %in% colnames(p.residual)]

In [None]:
dim(snp2)

In [None]:
head(snp2)

In [None]:
snp2_positions <- stringr::str_split_fixed(rownames(snp2), ":", 3)[, 2]

In [1]:
library(CpGWAS)

In [None]:
snp3 <- readRDS("20-IN_check_SNP_window_pos1987413_win50000.rds")

In [None]:
dim(snp3)

In [None]:
length(rownames(snp3))

In [None]:
map3 <- data.frame(POS = stringr::str_split_fixed(colnames(snp3), ":", 3)[, 2])

In [None]:
snp3 <- t(snp3)

In [None]:
snp3 <- snp3[which(map3$POS %in% pos_we_got), ]

In [None]:
map3 <- data.frame(POS = stringr::str_split_fixed(rownames(snp3), ":", 3)[, 2])

In [None]:
dim(map3)

Once we filter down to same SNPs, are data structures same? Must reorder old one too to make sure

In [None]:
colnames(snp2) <- gsub("Br0", "Br", colnames(snp2))

In [None]:
head(snp2)

In [None]:
snp2 <- snp2[, order(colnames(snp2))]

In [None]:
head(snp3)

In [None]:
head(snp2)

We also must flip the new one, round to 3 decimals, and convert to df

In [None]:
snp3 <- 2-snp3
snp3 <- as.data.frame(snp3)
snp3[] <- lapply(snp3, function(x) if(is.numeric(x)) round(x, 3) else x)

In [None]:
head(snp3)

In [None]:
# compare_data_frames <- function(df1, df2) {
#   # Initialize lists to store differences
#   differences <- list()
  
#   # Check if column names are identical
#   if (!identical(colnames(df1), colnames(df2))) {
#     differing_cols <- setdiff(union(colnames(df1), colnames(df2)), intersect(colnames(df1), colnames(df2)))
#     differences$column_names <- differing_cols
#   }
  
#   # Check if row names are identical
#   if (!identical(rownames(df1), rownames(df2))) {
#     differing_rows <- setdiff(union(rownames(df1), rownames(df2)), intersect(rownames(df1), rownames(df2)))
#     differences$row_names <- differing_rows
#   }
  
#   # Check for differing values
#   for (i in seq_len(nrow(df1))) {
#     for (j in seq_len(ncol(df1))) {
#       if (df1[i, j] != df2[i, j]) {
#         differences$values <- rbind(
#           differences$values, 
#           data.frame(
#             row = i, 
#             column = j, 
#             df1_value = df1[i, j], 
#             df2_value = df2[i, j]
#           )
#         )
#       }
#     }
#   }
  
#   return(differences)
# }

# # Example usage:
# # Assuming snp2 and snp3 are your data frames
# result <- compare_data_frames(snp2, snp3)

# # Print results
# if (!is.null(result$column_names)) {
#   cat("Differing column names:\n")
#   print(result$column_names)
# }

# if (!is.null(result$row_names)) {
#   cat("Differing row names:\n")
#   print(result$row_names)
# }

# if (!is.null(result$values)) {
#   cat("Differing values:\n")
#   print(result$values)
# } else {
#   cat("No differing values found.\n")
# }


Not quite the same but very close. Let's proceed with snp2 (old version) for now.

In [None]:
identical(snp2, snp3)

In [None]:
snp3 <- snp2

In [None]:
dim(snp3)

In [None]:
head(snp3)

In [None]:
dim(map3)

### Set window size and any other parameters

In [None]:
wind <- 10000

Is 1se vs min for lambda the problem?

## Run for all

In [None]:
head(map3)

In [None]:
dim(map3)

In [None]:
dim(snp3)

In [None]:
p.residual <- p.residual[, order(colnames(p.residual))]

In [None]:
for(k in 1:length(wind)){
    models.ea <- c()
    models.all <- c()
    for(i in 1:length(cg)){
    #for(i in 1){
            cat(i,"\n")
            print(paste0("This cg is: ", cg[i]))
            range1 <- ifelse(cg[i] - wind[k] > 0,cg[i] - wind[k],0)
            range2 <- cg[i] + wind[k]
            idx <- map3$POS > range1 & map3$POS < range2
            # go to next cg if no snps within window
            if(sum(idx) <= 1){
                    next
            }
            geno <- snp3[idx,] # changed snp2 to snp3
            rownames(geno) <- map3$POS[idx]
            trainX <- t(geno)
            trainY <- p.residual[i,]
            fit <- elastic.net(trainX,trainY)
            fit <- tryCatch(
                    elastic.net(trainX,trainY),
                    error = function(e) {return ("err")})
            if(!is.data.frame(fit)){
                if(fit == "err"){
                    next
                }
            }
            if(nrow(fit) == 0) next

            fit$cg <- cg[i]
            models.all <- rbind(models.all,fit)
            # EA only
            trainX <- trainX[idx.ea,]
            if(sum(apply(trainX,2,var)!=0) <= 1){
                    next
            }
            trainY <- trainY[idx.ea]
            fit <- tryCatch(
                    elastic.net(trainX,trainY),
                    error = function(e) {return ("err")})
            if(!is.data.frame(fit)){
                if(fit == "err"){
                    next
                }
            }
            if(nrow(fit) == 0) next
            fit$cg <- cg[i]
            models.ea <- rbind(models.ea,fit)
    }
}

In [None]:
models.ea <- models.ea[models.ea[,1] != "(Intercept)",]
models.all <- models.all[models.all[,1] != "(Intercept)",]

In [None]:
if(!dir.exists(outd)) dir.create(outd)

In [None]:
head(models.all)

In [None]:
dim(models.all)

In [None]:
# mwas by models of all samples
cg2 <- unique(models.all$cg)
mwas.all <- matrix(0,nrow=length(cg2),ncol=2)
for(i in 1:length(cg2)){
        pos <- models.all[models.all$cg == cg2[i],1]
        gwas <- snp.gwas2$z[is.element(snp.gwas2$pos_hg38, pos)]
        weight <- models.all[models.all$cg == cg2[i],2]
        geno <- snp.1kg.eur2[match(pos,map.1kg.eur2$POS),]
        mwas.all[i,] <- MWAS(gwas, weight, t(geno))
}
rownames(mwas.all) <- cg2
colnames(mwas.all) <- c("z","p")

# mwas by models of EA samples
cg2 <- unique(models.ea$cg)
mwas.ea <- matrix(0,nrow=length(cg2),ncol=2)
for(i in 1:length(cg2)){
        pos <- models.ea[models.ea$cg == cg2[i],1]
        gwas <- snp.gwas2$z[is.element(snp.gwas2$pos_hg38, pos)]
        weight <- models.ea[models.ea$cg == cg2[i],2]
        geno <- snp.1kg.eur2[match(pos,map.1kg.eur2$POS),]
        mwas.ea[i,] <- MWAS(gwas, weight, t(geno))
}
rownames(mwas.ea) <- cg2
colnames(mwas.ea) <- c("z","p")

# output models and mwas results
outf <- paste0(outd,"/models-a7-covnew.all.wind.",wind[k])
write.csv(models.all,outf)
outf <- paste0(outd,"/models-a7-covnew.ea.wind.",wind[k])
write.csv(models.ea,outf)
outf <- paste0(outd,"/mwas-a7-covnew.all.wind.",wind[k])
write.csv(mwas.all,outf)
outf <- paste0(outd,"/mwas-a7-covnew.ea.wind.",wind[k])
write.csv(mwas.ea,outf)

In [None]:
mwas.all

We get the same results with old SNPs,

## Compare with results from CpGWAS

In [None]:
# results <- fread("16a9par-OUT_stage2_MWAS_scz.csv")

# results <- results[which(results$chr == 7 & results$pos >= 1987413 & results$pos <= 1988332), ]

# results <- results[which(results$population == "EA" & results$region == "caud"), ]

# head(results)