Skip to content

Commit

Permalink
Merge pull request #100 from learningMalanya/dev
Browse files Browse the repository at this point in the history
Modified the backend code of the function scan() to improve efficiency.
  • Loading branch information
GregFa committed Jun 14, 2024
2 parents b78d3ad + bd5eef3 commit d666790
Showing 1 changed file with 80 additions and 75 deletions.
155 changes: 80 additions & 75 deletions src/scan.jl
Original file line number Diff line number Diff line change
Expand Up @@ -227,16 +227,18 @@ function scan(y::Array{Float64,2}, g::Array{Float64,2}, covar::Array{Float64, 2}
end

if assumption == "null"
if permutation_test == true
results = scan_perms_lite(y_st, g_st, covar_st, K_st; prior_variance = prior_variance, prior_sample_size = prior_sample_size,
addIntercept = addIntercept, reml = reml, method = method, optim_interval = optim_interval,
nperms = nperms, rndseed = rndseed,
decomp_scheme = decomp_scheme, output_pvals = output_pvals);
else
results = scan_null(y_st, g_st, covar_st, K_st, [prior_variance, prior_sample_size], addIntercept;
reml = reml, method = method, optim_interval = optim_interval,
decomp_scheme = decomp_scheme, output_pvals = output_pvals, chisq_df = chisq_df);
end
if permutation_test == true # if the user requires to perform permutation testing:
if nperms <= 0
throw(error("For permutation testing, the required number of permuted copies has to be a positive integer!"));
end
else # if permutation testing is not required by the user, set the number of permuted copies to 0:
nperms = 0;
end

results = scan_perms_lite(y_st, g_st, covar_st, K_st; prior_variance = prior_variance, prior_sample_size = prior_sample_size,
addIntercept = addIntercept, reml = reml, method = method, optim_interval = optim_interval,
nperms = nperms, rndseed = rndseed,
decomp_scheme = decomp_scheme, output_pvals = output_pvals);
elseif assumption == "alt"
if permutation_test == true
error("Permutation test option currently is not supported for the alternative assumption.");
Expand Down Expand Up @@ -277,6 +279,8 @@ end
"""
scan_null(y, g, K, reml, method)
Warning: scan_null is
Performs genome scan for univariate trait and each of the gene markers, one marker at a time,
assuming the variance components are the same for all markers.
Expand Down Expand Up @@ -307,57 +311,61 @@ A list of output values are returned:
as `null` (default).
"""
function scan_null(y::Array{Float64, 2}, g::Array{Float64, 2}, covar::Array{Float64, 2}, K::Array{Float64, 2},
prior::Array{Float64, 1}, addIntercept::Bool;
reml::Bool = false, method::String = "qr", optim_interval::Int64 = 1,
decomp_scheme::String = "eigen",
# option for returning p-values results:
output_pvals::Bool = false, chisq_df::Int64 = 1)

# number of markers
(n, p) = size(g)

num_of_covar = addIntercept ? (size(covar, 2)+1) : size(covar, 2);

# rotate data
(y0, X0, lambda0) = transform_rotation(y, [covar g], K;
addIntercept = addIntercept, decomp_scheme = decomp_scheme)
X0_covar = X0[:, 1:num_of_covar];

if size(X0_covar, 2) == 1
X0_covar = reshape(X0_covar, :, 1);
end

# fit null lmm
out00 = fitlmm(y0, X0_covar, lambda0, prior; reml = reml, method = method, optim_interval = optim_interval)
# weights proportional to the variances
sqrtw = sqrt.(makeweights(out00.h2, lambda0))
# rescale by weights
rowMultiply!(y0, sqrtw)
rowMultiply!(X0, sqrtw)
rowMultiply!(X0_covar, sqrtw);

# perform genome scan
rss0 = rss(y0, X0_covar; method = method)[1]
lod = zeros(p)

X = X0[:, 1:(num_of_covar+1)]
for i = 1:p
X[:, (num_of_covar+1)] = X0[:, num_of_covar+i]
rss1 = rss(y0, X; method = method)[1]
lod[i] = (-n/2)*(log10(rss1) .- log10(rss0))
# lrt = (rss0 - rss1)/out00.sigma2
# lod[i] = lrt/(2*log(10))
end

if output_pvals
log10pvals = lod2log10p.(lod, chisq_df);
return (sigma2_e = out00.sigma2, h2_null = out00.h2, lod = lod, log10pvals = log10pvals)
else
return (sigma2_e = out00.sigma2, h2_null = out00.h2, lod = lod)
end

end
# Note: use of the `scan_null()` is now deprecated because of its computational inefficiency compared to using `scan_perms_lite()`,
# which performs faster single-trait scan by vectorized operations.
# The following source code of `scan_null()` may be removed in future releases.

# function scan_null(y::Array{Float64, 2}, g::Array{Float64, 2}, covar::Array{Float64, 2}, K::Array{Float64, 2},
# prior::Array{Float64, 1}, addIntercept::Bool;
# reml::Bool = false, method::String = "qr", optim_interval::Int64 = 1,
# decomp_scheme::String = "eigen",
# # option for returning p-values results:
# output_pvals::Bool = false, chisq_df::Int64 = 1)

# # number of markers
# (n, p) = size(g)

# num_of_covar = addIntercept ? (size(covar, 2)+1) : size(covar, 2);

# # rotate data
# (y0, X0, lambda0) = transform_rotation(y, [covar g], K;
# addIntercept = addIntercept, decomp_scheme = decomp_scheme)
# X0_covar = X0[:, 1:num_of_covar];

# if size(X0_covar, 2) == 1
# X0_covar = reshape(X0_covar, :, 1);
# end

# # fit null lmm
# out00 = fitlmm(y0, X0_covar, lambda0, prior; reml = reml, method = method, optim_interval = optim_interval)
# # weights proportional to the variances
# sqrtw = sqrt.(makeweights(out00.h2, lambda0))
# # rescale by weights
# rowMultiply!(y0, sqrtw)
# rowMultiply!(X0, sqrtw)
# rowMultiply!(X0_covar, sqrtw);

# # perform genome scan
# rss0 = rss(y0, X0_covar; method = method)[1]
# lod = zeros(p)

# X = X0[:, 1:(num_of_covar+1)]
# for i = 1:p
# X[:, (num_of_covar+1)] = X0[:, num_of_covar+i]
# rss1 = rss(y0, X; method = method)[1]
# lod[i] = (-n/2)*(log10(rss1) .- log10(rss0))
# # lrt = (rss0 - rss1)/out00.sigma2
# # lod[i] = lrt/(2*log(10))
# end

# if output_pvals
# log10pvals = lod2log10p.(lod, chisq_df);
# return (sigma2_e = out00.sigma2, h2_null = out00.h2, lod = lod, log10pvals = log10pvals)
# else
# return (sigma2_e = out00.sigma2, h2_null = out00.h2, lod = lod)
# end

# end

## re-estimate variance components under alternative

Expand Down Expand Up @@ -524,33 +532,30 @@ function scan_perms_lite(y::Array{Float64,2}, g::Array{Float64,2}, covar::Array{
prior_b = prior_sample_size,
reml = reml, method = method, optim_interval = optim_interval); # reweighting and taking residuals

# If no permutation testing is required, move forward to process the single original vector
if nperms < 0
throw(error("The required number of permutations must be a positive integer."));
else
r0perm = transform_permute(r0; nperms = nperms, rndseed = rndseed, original = true);
end

# Compute the matrix of pair-wise correlations between permuted copies and markers:
r0perm = transform_permute(r0; nperms = nperms, rndseed = rndseed, original = true);
norm_y = mapslices(x -> norm(x), r0perm, dims = 1) |> vec;

norm_X = mapslices(x -> norm(x), X00, dims = 1) |> vec;


colDivide!(r0perm, norm_y);
colDivide!(X00, norm_X);

L = X00' * r0perm
threaded_map!(r2lod, L, n);
L = X00' * r0perm # the matrix of correlations
threaded_map!(r2lod, L, n); # map elementwise-ly to compute LOD scores

lod = L[:, 1]; # lod scores for the original trait;
L_perms = L[:, 2:end]; # lod scores for the permuted copies of the original, excluding the lod scores for the original trait

if output_pvals
log10pvals = lod2log10p.(lod, chisq_df);
if nperms == 0 # if no permutation is required, return results only for the input trait
return (sigma2_e = sigma2_e, h2_null = h2_null, lod = lod, log10pvals = log10pvals)
end
log10Pvals_perms = lod2log10p.(L_perms, chisq_df);
return (sigma2_e = sigma2_e, h2_null = h2_null, lod = lod, log10pvals = pvals,
return (sigma2_e = sigma2_e, h2_null = h2_null, lod = lod, log10pvals = log10pvals,
L_perms = L_perms, log10Pvals_perms = log10Pvals_perms)
else
if nperms == 0 # if no permutation is required, return results only for the input trait
return (sigma2_e = sigma2_e, h2_null = h2_null, lod = lod)
end
return (sigma2_e = sigma2_e, h2_null = h2_null, lod = lod, L_perms = L_perms)
end

Expand Down

0 comments on commit d666790

Please sign in to comment.