# Analyzing a toy example with `varbvs` and `mr-ash`

In this example we analyze a data set where there is potentially multiple SNPs associated with gene expression. We use both `bvs` and `mr-ash` methods, initializing both of them with results from `lasso`.

## Utility functions


In [1]:
autoselect.mixsd = function(betahat,sebetahat,mult = sqrt(2)){
        # To avoid exact measure causing (usually by mistake)
        sebetahat = sebetahat[sebetahat!=0] 
        # so that the minimum is small compared with measurement precision
        sigmaamin = min(sebetahat)/10 
        if (all(betahat^2 <= sebetahat^2)) {
            # to deal with the occassional odd case where this could happen; 8 is arbitrary
            sigmaamax = 8*sigmaamin 
        } else {
            # this computes a rough largest value you'd want to use, 
            # based on idea that sigmaamax^2 + sebetahat^2 should be at least betahat^2   
            sigmaamax = 2*sqrt(max(betahat^2-sebetahat^2)) 
        }
        if(mult==0){
            return(c(0,sigmaamax/2))
        } else {
            npoint = ceiling(log2(sigmaamax/sigmaamin)/log2(mult))
            return(mult^((-npoint):0) * sigmaamax)
        }
    }
univariate_regression = function(X,y,Z = NULL){
        P = dim(X)[2]
        if (!is.null(Z)) {
            y = lm(y~Z)$residuals
        }
        output = matrix(0,nrow = P,ncol = 2)
        for(i in 1:P){
          g = summary(lm(y ~ X[,i]))
          output[i,] = g$coefficients[2,1:2]
        }
        return(list(betahat = output[,1], sebetahat = output[,2], 
                    residuals = y))
    }
lasso_reorder = function(X, y) {
        # perform lasso regression and reorder regressors by "importance"
        fit.glmnet <- glmnet::glmnet(X, y, intercept = F)
        beta_path = coef(fit.glmnet)[-1,]
        K = dim(beta_path)[2]
        path_order = c()
        for (k in 1:K) {
            crt_path = which(beta_path[,k] != 0)
            if (length(crt_path) != 0 & length(path_order) == 0) {
                path_order = c(path_order, crt_path)
            } else if(length(crt_path) != 0) {
                path_order = c(path_order, crt_path[-which(crt_path %in% path_order)] )
            }
        }
        path_order = unname(path_order)
        index_order = c(path_order, seq(1,dim(beta_path)[1])[-path_order])
        return(index_order)
    }

# compute fitted values from varbvsmix 
fitfv = function(X,Z,fit){
  bhat = rowSums(fit$alpha*fit$mu)
  return(X %*% bhat + fit$mu.cov[1] + Z %*% fit$mu.cov[-1])
}

## Core analysis

In [5]:
input_file = "../data/Thyroid.FMO2.1Mb.RDS"
dat = readRDS(input_file)
X = as.matrix(dat$X)
X = X[,which(colSums(X)!=0)]
storage.mode(X) <- "double"
y = as.vector(dat$y)
Z = as.matrix(dat$Z)
# univariate results
initial = univariate_regression(X, y ,Z)
mixsd = autoselect.mixsd(initial$betahat, initial$sebetahat)
mu_zero = matrix(0, ncol = length(mixsd)+1, nrow = ncol(X))
alpha_zero = matrix(1/ncol(X), ncol = length(mixsd)+1,nrow = ncol(X))
alpha_zero[,1] = 1 - length(mixsd) / ncol(X)
index_order = lasso_reorder(X, initial$residuals)
# bvsmix with lasso init vs random init
fit11 = varbvs::varbvsmix(X[, index_order], 
                         Z, y, sa = c(0,mixsd^2),
                         mu = mu_zero,
                         alpha = alpha_zero,
                         verbose = F)
fit12 = varbvs::varbvsmix(X, 
                         Z, y, sa = c(0,mixsd^2),
                         mu = mu_zero,
                         alpha = alpha_zero,
                         verbose = F)
# bvsmix with lasso init vs random init
fit21 = varbvs::varbvs(X[, index_order], Z, y, verbose = F)
fit22 = varbvs::varbvs(X, Z, y, verbose = F)

## Check top hits with lasso init

In [10]:
mix.top = names(sort(fit11$lfsr)[1:5])
cbind(1 - fit11$lfsr[mix.top], fit21$alpha[,1][mix.top])

0,1,2
chr1_171172098_C_T_b38,1.0,1.0
chr1_171122735_A_G_b38,1.0,0.005487233
chr1_171190872_G_A_b38,0.4840977,1.914644e-05
chr1_171219393_T_A_b38,0.4827524,2.446686e-05
chr1_171178705_A_G_b38,0.4825416,2.025885e-05


In [11]:
bvs.top = names(sort(fit21$alpha[,1], decreasing=T)[1:5])
cbind(1 - fit11$lfsr[bvs.top], fit21$alpha[,1][bvs.top])

0,1,2
chr1_171172098_C_T_b38,1.0,0.99999999
chr1_171133158_A_G_b38,0.4573966,0.99999951
chr1_171199984_T_C_b38,0.4605322,0.99843224
chr1_171252314_G_C_b38,0.4608848,0.03124062
chr1_171150061_G_C_b38,0.4147678,0.01524914
