In [1]:
using Pkg
# Pkg.add("Profile")
# Pkg.instantiate()

In [2]:
#= dependencies = ["DelimitedFiles", "LinearAlgebra", "Optim", "Distributions", "Test",
                "BenchmarkTools", "Random", "CSV", "DataFrames", "Distributed", "Plots"];
Pkg.add(dependencies) =#

In [3]:
using DelimitedFiles
using LinearAlgebra
using Optim
using Distributions
using Test
using BenchmarkTools
using Random
using CSV
using DataFrames
using Plots
using Profile
using Distributed

In [4]:
pwd()

"/home/xyu/github/BulkLMM.jl/analysis/BXD"

In [5]:
cd("..")

In [15]:
## Include the source code of BulkLMM to be tested:
include("../src/scan.jl");
include("../src/lmm.jl");
include("../src/wls.jl");
include("../src/util.jl");
include("../src/kinship.jl");
include("../src/readData.jl");
include("../src/parallel_helpers.jl")
## Also include the helper functions for writing tests:
include("../test/testHelper.jl");

In [16]:
## Read in BXD data:
pheno_file = "../data/bxdData/BXDtraits.csv"
pheno = readBXDpheno(pheno_file);
geno_file = "../data/bxdData/BXDgeno_prob.csv"
geno = readGenoProb_ExcludeComplements(geno_file);

kinship = calcKinship(geno); # calculate kinship matrix from genotype data

In [17]:
m = size(pheno, 2);
(n, p) = size(geno); # number of markers

In [18]:
## Consider the 7919-th trait
pheno_y = reshape(pheno[:, 7919], :, 1);

In [19]:
n_seeds = 20;

In [20]:
using StatsBase

In [21]:
seeds_list = StatsBase.sample(1:100, n_seeds; replace = false);

In [22]:
(y0, X0, lambda0) = transform_rotation(pheno_y, geno, kinship);

In [23]:
(r0, X00) = transform_reweight(y0, X0, lambda0; reml = false);

In [26]:
#addprocs(10);

In [27]:
workers()

10-element Vector{Int64}:
  2
  3
  4
  5
  6
  7
  8
  9
 10
 11

In [28]:
# rmprocs(workers())

In [30]:
@everywhere begin
    using Random
    using LinearAlgebra
    using Optim
    using Distributed
    
    ## Include the source code of BulkLMM to be tested:
    include("../src/scan.jl");
    include("../src/lmm.jl");
    include("../src/wls.jl");
    include("../src/util.jl");
    include("../src/kinship.jl");
    include("../src/readData.jl");
    include("../src/parallel_helpers.jl")
end

In [32]:
LODs_by_blocks = scan_perms_distributed(pheno_y, geno, kinship; reml = false, nperms = 1000, rndseed = 0, original = true, 
    option = "by blocks", nblocks = 1000);

In [33]:
LODs_ordinary = scan_perms(pheno_y, geno, kinship; reml = false, nperms = 1000, rndseed = 0, original = true);

In [34]:
LODs_by_nperms = scan_perms_distributed(pheno_y, geno, kinship; reml = false, nperms = 1000, original = true, 
    option = "by nperms", ncopies = 10);

In [35]:
LODs_by_blocks[1:6, :]

6×7321 Matrix{Float64}:
 0.00819636  0.00819636  0.00819636  …  0.0449878  0.0128283  0.0128283
 0.604956    0.604956    0.604956       1.33928    1.29426    1.29426
 0.0264842   0.0264842   0.0264842      0.16927    0.069518   0.069518
 0.00585467  0.00585467  0.00585467     0.589608   0.14854    0.14854
 0.477297    0.477297    0.477297       0.0317133  0.0625366  0.0625366
 0.140293    0.140293    0.140293    …  0.0400303  0.0338204  0.0338204

In [36]:
LODs_ordinary[1:6, :]

6×7321 Matrix{Float64}:
 0.00819636  0.00819636  0.00819636  …  0.0449878  0.0128283  0.0128283
 0.604956    0.604956    0.604956       1.33928    1.29426    1.29426
 0.0264842   0.0264842   0.0264842      0.16927    0.069518   0.069518
 0.00585467  0.00585467  0.00585467     0.589608   0.14854    0.14854
 0.477297    0.477297    0.477297       0.0317133  0.0625366  0.0625366
 0.140293    0.140293    0.140293    …  0.0400303  0.0338204  0.0338204

In [37]:
LODs_by_nperms[1:6, :]

6×7321 Matrix{Float64}:
 0.00819636  0.00819636  0.00819636  0.00819636  …  0.0128283    0.0128283
 0.152739    0.152739    0.152739    0.152739       0.0338036    0.0338036
 0.377097    0.377097    0.377097    0.377097       0.317929     0.317929
 0.0185493   0.0185493   0.0185493   0.0185493      0.381719     0.381719
 0.00721091  0.00721091  0.00721091  0.00721091     0.000609896  0.000609896
 0.00395086  0.00395086  0.00395086  0.00395086  …  0.0105481    0.0105481

In [38]:
maxSqDiff(LODs_by_blocks, LODs_ordinary)

7.888609052210118e-29

In [39]:
p

7321

In [40]:
floor(Int, 11/3)

3

In [41]:
11%3

2

In [42]:
7321%150

121

In [43]:
createBlocks(p, ceil(Int, p/150))

150-element Vector{UnitRange{Int64}}:
 1:49
 50:98
 99:147
 148:196
 197:245
 246:294
 295:343
 344:392
 393:441
 442:490
 491:539
 540:588
 589:637
 ⋮
 6763:6811
 6812:6860
 6861:6909
 6910:6958
 6959:7007
 7008:7056
 7057:7105
 7106:7154
 7155:7203
 7204:7252
 7253:7301
 7302:7321

In [44]:
scan_perms_distributed(pheno_y, geno, kinship; reml = false, nperms = 150, rndseed = 0, original = true, 
    option = "by blocks", nblocks = 150)

151×7321 Matrix{Float64}:
 0.00819636  0.00819636  0.00819636  0.00819636  …  0.0128283    0.0128283
 0.604956    0.604956    0.604956    0.604956       1.29426      1.29426
 0.0264842   0.0264842   0.0264842   0.0264842      0.069518     0.069518
 0.00585467  0.00585467  0.00585467  0.00585467     0.14854      0.14854
 0.477297    0.477297    0.477297    0.477297       0.0625366    0.0625366
 0.140293    0.140293    0.140293    0.140293    …  0.0338204    0.0338204
 0.45141     0.45141     0.45141     0.45141        0.278652     0.278652
 0.179818    0.179818    0.179818    0.179818       0.0218039    0.0218039
 0.213351    0.213351    0.213351    0.213351       0.193549     0.193549
 0.0250041   0.0250041   0.0250041   0.0250041      0.00622826   0.00622826
 0.801668    0.801668    0.801668    0.801668    …  0.123412     0.123412
 0.0351507   0.0351507   0.0351507   0.0351507      0.0480549    0.0480549
 0.053255    0.053255    0.053255    0.053255       0.11254      0.11254
 ⋮      

In [45]:
@benchmark scan_perms_distributed(pheno_y, geno, kinship; reml = false, nperms = 150, rndseed = 0, original = true, 
    option = "by blocks", nblocks = 150)

BenchmarkTools.Trial: 18 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m270.318 ms[22m[39m … [35m315.843 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m0.00% … 0.00%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m279.174 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m287.181 ms[22m[39m ± [32m 14.976 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.20% ± 0.45%

  [39m [39m [39m [39m [39m [39m [39m█[39m [39m [39m▃[39m [39m [34m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m▇[39m▁[39m▁[39m▇

In [46]:
@benchmark scan_perms_distributed(pheno_y, geno, kinship; reml = false, nperms = 1000, rndseed = 0, original = true, 
    option = "by blocks", nblocks = 150)

BenchmarkTools.Trial: 6 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m801.650 ms[22m[39m … [35m951.926 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m1.16% … 1.68%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m821.979 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m0.50%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m843.708 ms[22m[39m ± [32m 54.664 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m0.66% ± 0.74%

  [39m▁[39m [39m [39m [39m [39m [39m [39m▁[34m█[39m[39m [39m [39m [39m [39m [39m [39m [39m [32m▁[39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁[39m [39m 
  [39m█[39m▁[39m▁[39m▁

In [47]:
@benchmark scan_perms(pheno_y, geno, kinship; reml = false, nperms = 1000, rndseed = 0, original = true)

BenchmarkTools.Trial: 2 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m2.598 s[22m[39m … [35m   2.816 s[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m5.32% … 6.68%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m2.707 s               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m6.03%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m2.707 s[22m[39m ± [32m154.488 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m6.03% ± 0.96%

  [34m█[39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m█[39m [39m 
  [34m█[39m[39m▁[39m▁[39m▁[39m▁[39m▁[39m▁[39m

In [48]:
@benchmark scan_null(pheno_y, geno, kinship)

BenchmarkTools.Trial: 112 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m27.908 ms[22m[39m … [35m100.228 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 0.00% … 62.44%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m40.179 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m 0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m44.629 ms[22m[39m ± [32m 18.538 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m18.37% ± 22.57%

  [39m▄[39m [39m▁[39m [39m [39m [39m [39m [39m [39m▁[39m [34m█[39m[39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m▁[39m [39m 
  [39m█[39m▆[39m█[39m▁[3

In [49]:
m

35556

In [50]:
?transform_permute

search: [0m[1mt[22m[0m[1mr[22m[0m[1ma[22m[0m[1mn[22m[0m[1ms[22m[0m[1mf[22m[0m[1mo[22m[0m[1mr[22m[0m[1mm[22m[0m[1m_[22m[0m[1mp[22m[0m[1me[22m[0m[1mr[22m[0m[1mm[22m[0m[1mu[22m[0m[1mt[22m[0m[1me[22m [0m[1mt[22m[0m[1mr[22m[0m[1ma[22m[0m[1mn[22m[0m[1ms[22m[0m[1mf[22m[0m[1mo[22m[0m[1mr[22m[0m[1mm[22m[0m[1m_[22mbxd_[0m[1mp[22mh[0m[1me[22mno_to_gemma



No documentation found.

`transform_permute` is a `Function`.

```
# 1 method for generic function "transform_permute":
[1] transform_permute(r0::Matrix{Float64}; nperms, rndseed, original) in Main at /home/xyu/github/BulkLMM.jl/src/parallel_helpers.jl:175
```


In [51]:
test_perm = transform_permute(r0; nperms = 10, rndseed = 0, original = true)

79×11 Matrix{Float64}:
 -0.42257    -0.399306   0.106865   …  -0.528949    0.54331    -0.430699
 -1.48199     0.200302  -0.327519       0.207851   -1.44553    -0.220243
 -1.44553     0.111675   0.699129       0.511444   -0.119937   -0.137428
  0.699129    0.812863  -0.131841      -0.220243    0.768221   -1.39413
  1.27625     1.12818    0.31017       -0.137428    1.27625     0.404775
  0.425936   -0.220243  -1.27416    …  -0.430699    0.511444   -0.156672
 -0.365797   -0.463753   0.768221      -0.641664    0.574367    1.27625
 -0.0704027  -0.746313  -0.365797       0.517884    0.517884   -0.399306
 -0.703862   -0.288931   0.0414604      0.617305   -0.746313    0.699129
 -0.653201    0.101631  -0.746313       0.0543167  -0.137428   -0.365797
 -0.0971895  -0.580751  -1.13838    …  -0.746313    0.0414604  -1.48199
  0.225635    0.54331   -0.883433      -0.365797   -0.365797    0.656254
  0.171204    0.106865  -1.44553        0.106865   -0.430699   -0.313615
  ⋮                            

In [52]:
test_univar = scan_perms_distributed(pheno_y, geno, kinship; reml = false, nperms = 10, rndseed = 0, original = true, 
    option = "by blocks", nblocks = 150)

11×7321 Matrix{Float64}:
 0.00819636  0.00819636  0.00819636  …  0.0449878  0.0128283   0.0128283
 0.604956    0.604956    0.604956       1.33928    1.29426     1.29426
 0.0264842   0.0264842   0.0264842      0.16927    0.069518    0.069518
 0.00585467  0.00585467  0.00585467     0.589608   0.14854     0.14854
 0.477297    0.477297    0.477297       0.0317133  0.0625366   0.0625366
 0.140293    0.140293    0.140293    …  0.0400303  0.0338204   0.0338204
 0.45141     0.45141     0.45141        0.459152   0.278652    0.278652
 0.179818    0.179818    0.179818       0.0430329  0.0218039   0.0218039
 0.213351    0.213351    0.213351       0.179031   0.193549    0.193549
 0.0250041   0.0250041   0.0250041      0.0679043  0.00622826  0.00622826
 0.801668    0.801668    0.801668    …  0.42936    0.123412    0.123412

In [53]:
scan_perms(pheno_y, geno, kinship; nperms = 10, rndseed = 0, original = true)

11×7321 Matrix{Float64}:
 0.00819636  0.00819636  0.00819636  …  0.0449878  0.0128283   0.0128283
 0.604956    0.604956    0.604956       1.33928    1.29426     1.29426
 0.0264842   0.0264842   0.0264842      0.16927    0.069518    0.069518
 0.00585467  0.00585467  0.00585467     0.589608   0.14854     0.14854
 0.477297    0.477297    0.477297       0.0317133  0.0625366   0.0625366
 0.140293    0.140293    0.140293    …  0.0400303  0.0338204   0.0338204
 0.45141     0.45141     0.45141        0.459152   0.278652    0.278652
 0.179818    0.179818    0.179818       0.0430329  0.0218039   0.0218039
 0.213351    0.213351    0.213351       0.179031   0.193549    0.193549
 0.0250041   0.0250041   0.0250041      0.0679043  0.00622826  0.00622826
 0.801668    0.801668    0.801668    …  0.42936    0.123412    0.123412

In [54]:
X00

79×7321 Matrix{Float64}:
  0.285431     0.285431     0.285431    …  -0.263737    -0.263737
  0.831539     0.831539     0.831539       -0.0785915   -0.0785915
 -1.66534     -1.66534     -1.66534        -0.200825    -0.200825
 -0.108772    -0.108772    -0.108772       -0.595318    -0.595318
  0.303485     0.303485     0.303485       -0.0773292   -0.0773292
  0.120631     0.120631     0.120631    …   0.0323032    0.0323032
 -0.120643    -0.120643    -0.120643        0.212935     0.212935
  0.165916     0.165916     0.165916        0.477606     0.477606
 -0.00450792  -0.00450792  -0.00450792     -1.36891     -1.36891
 -0.447304    -0.447304    -0.447304       -0.229852    -0.229852
 -0.0323028   -0.0323028   -0.0323028   …  -0.43835     -0.43835
  0.18168      0.18168      0.18168        -0.578258    -0.578258
 -0.438445    -0.438445    -0.438445       -0.66382     -0.66382
  ⋮                                     ⋱                ⋮
  1.09159      1.09159      1.09159         0.433778     0

In [55]:
# colStandardize!(X00)

In [56]:
# colStandardize!(test_perm)

In [57]:
r2lod(cor(test_perm[:, 1], X00[:, 1]), n)

LoadError: UndefVarError: r2lod not defined

In [58]:
function r2lod(r::Float64, n::Int64)
    
    return n/2 * log10(1-r^2)
    
end

r2lod (generic function with 1 method)

In [59]:
test_r2 = transpose(test_perm)*X00

11×7321 Matrix{Float64}:
 -0.736914  -0.736914  -0.736914  …  -1.88829  -1.00027   -1.00027
  6.27628    6.27628    6.27628      10.1116    9.8625     9.8625
 -1.32429   -1.32429   -1.32429      -3.65616  -2.32662   -2.32662
 -0.622834  -0.622834  -0.622834      6.78214   3.39703    3.39703
 -5.5852    -5.5852    -5.5852       -1.58572  -2.20693   -2.20693
 -3.04291   -3.04291   -3.04291   …  -1.78134  -1.62365   -1.62365
  5.43366    5.43366    5.43366      -5.99632  -4.64393   -4.64393
  3.443      3.443      3.443         1.84686   1.3039     1.3039
 -3.74849   -3.74849   -3.74849      -3.75957  -3.87515   -3.87515
  1.28678    1.28678    1.28678       2.31913   0.697044   0.697044
  7.20446    7.20446    7.20446   …  -5.80103  -3.09752   -3.09752

In [60]:
test_r2 .^2

11×7321 Matrix{Float64}:
  0.543042   0.543042   0.543042   0.543042  …    3.56564   1.00055   1.00055
 39.3917    39.3917    39.3917    39.3917       102.245    97.2689   97.2689
  1.75375    1.75375    1.75375    1.75375       13.3675    5.41315   5.41315
  0.387922   0.387922   0.387922   0.387922      45.9974   11.5398   11.5398
 31.1945    31.1945    31.1945    31.1945         2.51451   4.87052   4.87052
  9.25928    9.25928    9.25928    9.25928   …    3.17318   2.63623   2.63623
 29.5247    29.5247    29.5247    29.5247        35.9559   21.5661   21.5661
 11.8543    11.8543    11.8543    11.8543         3.4109    1.70017   1.70017
 14.0512    14.0512    14.0512    14.0512        14.1343   15.0168   15.0168
  1.65581    1.65581    1.65581    1.65581        5.37837   0.48587   0.48587
 51.9042    51.9042    51.9042    51.9042    …   33.6519    9.59462   9.59462

In [61]:
n

79

In [62]:
test_lods_liteQTL = mapslices(x -> n/2 * log10.(1 .- x.^2), test_r2; dims = 1)

LoadError: DomainError with -38.39174317162547:
log10 will only return a complex result if called with a complex argument. Try log10(Complex(x)).

In [63]:
?map

search: [0m[1mm[22m[0m[1ma[22m[0m[1mp[22m [0m[1mm[22m[0m[1ma[22m[0m[1mp[22m! [0m[1mm[22m[0m[1ma[22m[0m[1mp[22mc [0m[1mm[22m[0m[1ma[22m[0m[1mp[22mcols [0m[1mm[22m[0m[1ma[22m[0m[1mp[22mfoldr [0m[1mm[22m[0m[1ma[22m[0m[1mp[22mfoldl [0m[1mm[22m[0m[1ma[22m[0m[1mp[22mcols! [0m[1mm[22m[0m[1ma[22m[0m[1mp[22mslices [0m[1mm[22m[0m[1ma[22m[0m[1mp[22mreduce



```
map(f, c...) -> collection
```

Transform collection `c` by applying `f` to each element. For multiple collection arguments, apply `f` elementwise, and stop when when any of them is exhausted.

See also [`map!`](@ref), [`foreach`](@ref), [`mapreduce`](@ref), [`mapslices`](@ref), [`zip`](@ref), [`Iterators.map`](@ref).

# Examples

```jldoctest
julia> map(x -> x * 2, [1, 2, 3])
3-element Vector{Int64}:
 2
 4
 6

julia> map(+, [1, 2, 3], [10, 20, 30, 400, 5000])
3-element Vector{Int64}:
 11
 22
 33
```

---

```
map(f, A::AbstractArray...) -> N-array
```

When acting on multi-dimensional arrays of the same [`ndims`](@ref), they must all have the same [`axes`](@ref), and the answer will too.

See also [`broadcast`](@ref), which allows mismatched sizes.

# Examples

```
julia> map(//, [1 2; 3 4], [4 3; 2 1])
2×2 Matrix{Rational{Int64}}:
 1//4  2//3
 3//2  4//1

julia> map(+, [1 2; 3 4], zeros(2,1))
ERROR: DimensionMismatch

julia> map(+, [1 2; 3 4], [1,10,100,1000], zeros(3,1))  # iterates until 3rd is exhausted
3-element Vector{Float64}:
   2.0
  13.0
 102.0
```

---

```
map(f, x::PooledArray; pure::Bool=false)
```

Transform `PooledArray` `x` by applying `f` to each element.

If `pure=true` then `f` is applied to each element of pool of `x` exactly once (even if some elements in pool are not present it `x`). This will typically be much faster when the proportion of unique values in `x` is small.

If `pure=false`, the returned array will use the same reference type as `x`, or `Int` if the number of unique values in the result is too large to fit in that type.
