In [3]:
using Pkg
#Pkg.add("Profile")
Pkg.instantiate()

In [4]:
#= dependencies = ["DelimitedFiles", "LinearAlgebra", "Optim", "Distributions", "Test",
                "BenchmarkTools", "Random", "CSV", "DataFrames", "Distributed", "Plots"];
Pkg.add(dependencies)=#

In [5]:
using DelimitedFiles
using LinearAlgebra
using Optim
using Distributions
using Test
using BenchmarkTools
using Random
using CSV
using DataFrames
using Distributed
using Plots
using Profile

In [6]:
pwd()

"/home/zyu20/git/BulkLMM.jl/analysis"

In [7]:
## Include the source code of BulkLMM to be tested:
include("../src/scan.jl");
include("../src/lmm.jl");
include("../src/wls.jl");
include("../src/util.jl");
include("../src/kinship.jl");
include("../src/readData.jl");
include("../src/scan_for_tests.jl")

## Also include the helper functions for writing tests:
include("../test/testHelper.jl");

In [8]:
## Read in BXD data:
pheno_file = "../data/bxdData/BXDtraits.csv"
pheno = readBXDpheno(pheno_file);
geno_file = "../data/bxdData/BXDgeno_prob.csv"
geno = readGenoProb_ExcludeComplements(geno_file);

kinship = calcKinship(geno); # calculate kinship matrix from genotype data

In [9]:
nperms = 1024; # number of permutated copies required;
m = size(pheno, 2);
p = size(geno, 2); # number of markers

In [10]:
## Consider the 7919-th trait
pheno_y = reshape(pheno[:, 7919], :, 1);

## Do the same for many traits (first 1000):

In [11]:
# Function that extracts the row maximums for a matrix:
function getMaxLod_manyTraits(pheno::Array{Float64, 2}, geno::Array{Float64, 2}, kinship::Array{Float64, 2};
                              ntraits::Int64 = 1000, nperms::Int64 = 1024, original::Bool = true)
    
    if original
        nrows = nperms+1
    else
        nrows = nperms
    end
    
    maxLods_manyTraits = Matrix{Float64}(undef, nrows, ntraits);
    
    for k in 1:ntraits
        
        pheno_k = reshape(pheno[:, k], :, 1);
        results = scan_perms(pheno_k, geno, kinship; nperms = nperms)
        
        maxLods_manyTraits[:, k] = getMaxLod(results)
    end
    
    return maxLods_manyTraits; 
end

function getMaxLod_manyTraits2(pheno::Array{Float64, 2}, geno::Array{Float64, 2}, kinship::Array{Float64, 2};
                              ntraits::Int64 = 1000, nperms::Int64 = 1024, original::Bool = true)
    
    if original
        nrows = nperms+1
    else
        nrows = nperms
    end
    
    maxLods_manyTraits = Matrix{Float64}(undef, nrows, ntraits);
    
    for k in 1:ntraits
        
        pheno_k = reshape(pheno[:, k], :, 1);
        results = scan_perms2(pheno_k, geno, kinship; nperms = nperms)
        
        maxLods_manyTraits[:, k] = getMaxLod(results)
    end
    
    return maxLods_manyTraits; 
end


function getMaxLod(results::Array{Float64, 2})
    
    return mapslices(x -> maximum(x), results; dims = 2);
    
end

getMaxLod (generic function with 1 method)

In [12]:
@benchmark scan_perms(pheno_y, geno, kinship; nperms = 1024) samples = 10 # old way of doing it (in scan.jl)

BenchmarkTools.Trial: 2 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m4.452 s[22m[39m … [35m   4.710 s[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m4.68% … 7.94%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m4.581 s               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m6.36%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m4.581 s[22m[39m ± [32m182.277 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m6.36% ± 2.31%

  [34m█[39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m█[39m [39m 
  [34m█[39m[39m▁[39m▁[39m▁[39m▁[39m▁[39m▁[39m

In [13]:
@benchmark scan_perms2(pheno_y, geno, kinship; nperms = 1024) samples = 10 # new way of doing it (in scan_for_tests.jl)

BenchmarkTools.Trial: 2 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m3.350 s[22m[39m … [35m   3.722 s[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 6.82% … 15.91%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m3.536 s               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m11.60%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m3.536 s[22m[39m ± [32m262.629 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m11.60% ±  6.43%

  [34m█[39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m█[39m [39m 
  [34m█[39m[39m▁[39m▁[39m▁[39m▁[39m▁[39m▁

In [14]:
ntraits_list = [1, 10, 100, 1000];
nperms_list = [0, 10, 100, 1000];

runtimes_list1 = Array{Float64, 2}(undef, length(ntraits_list), length(nperms_list));
memorys_list1 = Array{Float64, 2}(undef, length(ntraits_list), length(nperms_list));
runtimes_list2 = Array{Float64, 2}(undef, length(ntraits_list), length(nperms_list));
memorys_list2 = Array{Float64, 2}(undef, length(ntraits_list), length(nperms_list));

In [None]:
for i in 1:length(ntraits_list)
    
    ntraits = ntraits_list[i];
    
    for j in 1:length(nperms_list)
        
        nperms = nperms_list[j];
        
        b1 = @benchmark getMaxLod_manyTraits(pheno, geno, kinship; ntraits = $ntraits, nperms = $nperms);
        b1 = median(b1);
        
        b2 = @benchmark getMaxLod_manyTraits2(pheno, geno, kinship; ntraits = $ntraits, nperms = $nperms);
        b2 = median(b2);
        
        runtimes_list1[i, j] = time(b1)/1e9;
        memorys_list1[i, j] = memory(b1)/(1024*1024*1024);
        
        runtimes_list2[i, j] = time(b2)/1e9;
        memorys_list2[i, j] = memory(b2)/(1024*1024*1024);
    
    end
end

In [None]:
runtimes_list1

In [None]:
runtimes_list2

In [None]:
memorys_list1

In [None]:
memorys_list2