In [16]:
using Pkg
# Pkg.add("Profile")
# Pkg.instantiate()

In [17]:
#= dependencies = ["DelimitedFiles", "LinearAlgebra", "Optim", "Distributions", "Test",
                "BenchmarkTools", "Random", "CSV", "DataFrames", "Distributed", "Plots"];
Pkg.add(dependencies) =#

In [18]:
using DelimitedFiles
using LinearAlgebra
using Optim
using Distributions
using Test
using BenchmarkTools
using Random
using CSV
using DataFrames
using Plots
using Profile
using Distributed

In [19]:
pwd()

"/Users/zifanyu/Documents/GitHub/BulkLMM.jl/analysis/BXD"

In [20]:
cd("..")

In [21]:
## Include the source code of BulkLMM to be tested:
include("../src/scan.jl");
include("../src/lmm.jl");
include("../src/wls.jl");
include("../src/util.jl");
include("../src/kinship.jl");
include("../src/readData.jl");
include("../src/scan_distributed.jl") # new function

## Also include the helper functions for writing tests:
include("../test/testHelper.jl");

In [22]:
## Read in BXD data:
pheno_file = "../data/bxdData/BXDtraits.csv"
pheno = readBXDpheno(pheno_file);
geno_file = "../data/bxdData/BXDgeno_prob.csv"
geno = readGenoProb_ExcludeComplements(geno_file);

kinship = calcKinship(geno); # calculate kinship matrix from genotype data

In [23]:
m = size(pheno, 2);
(n, p) = size(geno); # number of markers

In [24]:
## Consider the 7919-th trait
pheno_y = reshape(pheno[:, 7919], :, 1);

In [25]:
n_seeds = 20;

In [26]:
using StatsBase

In [27]:
seeds_list = StatsBase.sample(1:100, n_seeds; replace = false);

In [110]:
(y0, X0, lambda0) = transform1(pheno_y, geno, kinship);

In [111]:
(r0, X00) = transform2(y0, X0, lambda0; reml = false);

In [32]:
# addprocs(10);

In [33]:
workers()

1-element Vector{Int64}:
 1

In [34]:
# rmprocs(workers())

In [36]:
@everywhere begin
    using Random
    using LinearAlgebra
    using Optim
    using Distributed
    
    ## Include the source code of BulkLMM to be tested:
    include("../src/scan.jl");
    include("../src/lmm.jl");
    include("../src/wls.jl");
    include("../src/util.jl");
    include("../src/kinship.jl");
    include("../src/readData.jl");
    include("../src/scan_distributed.jl")

    include("../src/parallel_helpers.jl")
end

In [37]:
LODs_by_blocks = scan_perms_distributed(pheno_y, geno, kinship; reml = false, nperms = 1000, rndseed = 0, original = true, 
    option = "by blocks", nblocks = 1000);

In [38]:
LODs_ordinary = scan_perms(pheno_y, geno, kinship; reml = false, nperms = 1000, rndseed = 0, original = true);

In [39]:
LODs_by_nperms = scan_perms_distributed(pheno_y, geno, kinship; reml = false, nperms = 1000, original = true, 
    option = "by nperms", ncopies = 10);

In [40]:
LODs_by_blocks[1:6, :]

6×7321 Matrix{Float64}:
 0.00819636  0.00819636  0.00819636  …  0.0449878   0.0128283  0.0128283
 1.13823     1.13823     1.13823        0.972279    1.83125    1.83125
 0.182157    0.182157    0.182157       0.00992902  0.0104772  0.0104772
 0.0430406   0.0430406   0.0430406      0.276304    0.248817   0.248817
 1.02692     1.02692     1.02692        0.305662    0.41929    0.41929
 0.0273182   0.0273182   0.0273182   …  0.258444    0.567742   0.567742

In [41]:
LODs_ordinary[1:6, :]

6×7321 Matrix{Float64}:
 0.00819636  0.00819636  0.00819636  …  0.0449878   0.0128283  0.0128283
 1.13823     1.13823     1.13823        0.972279    1.83125    1.83125
 0.182157    0.182157    0.182157       0.00992902  0.0104772  0.0104772
 0.0430406   0.0430406   0.0430406      0.276304    0.248817   0.248817
 1.02692     1.02692     1.02692        0.305662    0.41929    0.41929
 0.0273182   0.0273182   0.0273182   …  0.258444    0.567742   0.567742

In [42]:
LODs_by_nperms[1:6, :]

6×7321 Matrix{Float64}:
 0.00819636  0.00819636  0.00819636  …  0.0449878  0.0128283    0.0128283
 0.0451503   0.0451503   0.0451503      0.120361   0.276036     0.276036
 0.162684    0.162684    0.162684       0.0296979  0.000749104  0.000749104
 0.012897    0.012897    0.012897       0.0125182  0.186163     0.186163
 0.00751307  0.00751307  0.00751307     0.984605   0.663644     0.663644
 0.207029    0.207029    0.207029    …  0.265689   0.297859     0.297859

In [43]:
maxSqDiff(LODs_by_blocks, LODs_ordinary)

0.0

In [44]:
p

7321

In [45]:
floor(Int, 11/3)

3

In [46]:
11%3

2

In [47]:
7321%150

121

In [51]:
createBlocks(p, ceil(Int, p/150))

150-element Vector{UnitRange{Int64}}:
 1:49
 50:98
 99:147
 148:196
 197:245
 246:294
 295:343
 344:392
 393:441
 442:490
 491:539
 540:588
 589:637
 ⋮
 6763:6811
 6812:6860
 6861:6909
 6910:6958
 6959:7007
 7008:7056
 7057:7105
 7106:7154
 7155:7203
 7204:7252
 7253:7301
 7302:7321

In [52]:
scan_perms_distributed(pheno_y, geno, kinship; reml = false, nperms = 150, rndseed = 0, original = true, 
    option = "by blocks", nblocks = 150)

151×7321 Matrix{Float64}:
 0.00819636   0.00819636   0.00819636   …  0.0128283    0.0128283
 1.13823      1.13823      1.13823         1.83125      1.83125
 0.182157     0.182157     0.182157        0.0104772    0.0104772
 0.0430406    0.0430406    0.0430406       0.248817     0.248817
 1.02692      1.02692      1.02692         0.41929      0.41929
 0.0273182    0.0273182    0.0273182    …  0.567742     0.567742
 0.369665     0.369665     0.369665        0.797462     0.797462
 0.0619086    0.0619086    0.0619086       0.120096     0.120096
 1.50799      1.50799      1.50799         0.124231     0.124231
 0.0214375    0.0214375    0.0214375       0.066094     0.066094
 0.457624     0.457624     0.457624     …  0.122794     0.122794
 0.0674599    0.0674599    0.0674599       0.781827     0.781827
 0.000739424  0.000739424  0.000739424     0.278836     0.278836
 ⋮                                      ⋱               ⋮
 0.0133127    0.0133127    0.0133127       0.178181     0.178181
 0.005

In [53]:
@benchmark scan_perms_distributed(pheno_y, geno, kinship; reml = false, nperms = 150, rndseed = 0, original = true, 
    option = "by blocks", nblocks = 150)

BenchmarkTools.Trial: 17 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m271.183 ms[22m[39m … [35m431.345 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m5.08% … 24.21%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m279.307 ms               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m6.34%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m309.463 ms[22m[39m ± [32m 54.895 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m8.65% ±  5.09%

  [39m▄[39m█[39m [34m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m█[39m█[39m▆[34

In [54]:
@benchmark scan_perms_distributed(pheno_y, geno, kinship; reml = false, nperms = 1000, rndseed = 0, original = true, 
    option = "by blocks", nblocks = 150)

BenchmarkTools.Trial: 2 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m2.658 s[22m[39m … [35m   3.640 s[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m15.97% … 31.89%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m3.149 s               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m25.17%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m3.149 s[22m[39m ± [32m694.300 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m25.17% ± 11.26%

  [34m█[39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m█[39m [39m 
  [34m█[39m[39m▁[39m▁[39m▁[39m▁[39m▁[39m▁

In [59]:
@benchmark scan_perms(pheno_y, geno, kinship; reml = false, nperms = 1000, rndseed = 0, original = true)

BenchmarkTools.Trial: 2 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m3.926 s[22m[39m … [35m   4.127 s[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m44.18% … 47.17%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m4.027 s               [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m45.71%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m4.027 s[22m[39m ± [32m141.632 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m45.71% ±  2.11%

  [34m█[39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m█[39m [39m 
  [34m█[39m[39m▁[39m▁[39m▁[39m▁[39m▁[39m▁

In [58]:
@benchmark scan_null(pheno_y, geno, kinship)

BenchmarkTools.Trial: 190 samples with 1 evaluation.
 Range [90m([39m[36m[1mmin[22m[39m … [35mmax[39m[90m):  [39m[36m[1m19.615 ms[22m[39m … [35m62.028 ms[39m  [90m┊[39m GC [90m([39mmin … max[90m): [39m 0.00% … 67.26%
 Time  [90m([39m[34m[1mmedian[22m[39m[90m):     [39m[34m[1m21.172 ms              [22m[39m[90m┊[39m GC [90m([39mmedian[90m):    [39m 0.00%
 Time  [90m([39m[32m[1mmean[22m[39m ± [32mσ[39m[90m):   [39m[32m[1m26.572 ms[22m[39m ± [32m12.787 ms[39m  [90m┊[39m GC [90m([39mmean ± σ[90m):  [39m20.85% ± 22.94%

  [39m▁[39m▄[34m█[39m[39m [39m [39m [39m [39m [39m [39m [32m [39m[39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m [39m 
  [39m█[39m█[34m█[39m[39m█[39m▃

In [60]:
m

35556

In [64]:
?transform_permute

search: [0m[1mt[22m[0m[1mr[22m[0m[1ma[22m[0m[1mn[22m[0m[1ms[22m[0m[1mf[22m[0m[1mo[22m[0m[1mr[22m[0m[1mm[22m[0m[1m_[22m[0m[1mp[22m[0m[1me[22m[0m[1mr[22m[0m[1mm[22m[0m[1mu[22m[0m[1mt[22m[0m[1me[22m [0m[1mt[22m[0m[1mr[22m[0m[1ma[22m[0m[1mn[22m[0m[1ms[22m[0m[1mf[22m[0m[1mo[22m[0m[1mr[22m[0m[1mm[22m[0m[1m_[22mbxd_[0m[1mp[22mh[0m[1me[22mno_to_gemma



No documentation found.

`transform_permute` is a `Function`.

```
# 1 method for generic function "transform_permute":
[1] transform_permute(r0::Matrix{Float64}; nperms, rndseed, original) in Main at /Users/zifanyu/Documents/GitHub/BulkLMM.jl/src/parallel_helpers.jl:175
```


In [80]:
test_perm = transform_permute(r0; nperms = 10, rndseed = 0, original = true)

79×11 Matrix{Float64}:
 -0.42257    -0.399306   0.106865   …  -0.528949    0.54331     0.430699
 -1.48199     0.200302  -0.327519       0.207851   -1.44553    -0.220243
 -1.44553     0.111675  -0.699129       0.511444   -0.119937   -0.137428
 -0.699129    0.812863  -0.131841      -0.220243    0.768221    1.39413
  1.27625     1.12818    0.31017       -0.137428    1.27625     0.404775
  0.425936   -0.220243  -1.27416    …   0.430699    0.511444    0.156672
  0.365797   -0.463753   0.768221      -0.641664    0.574367    1.27625
 -0.0704027   0.746313   0.365797       0.517884    0.517884   -0.399306
 -0.703862   -0.288931  -0.0414604     -0.617305    0.746313   -0.699129
 -0.653201    0.101631   0.746313      -0.0543167  -0.137428    0.365797
 -0.0971895  -0.580751  -1.13838    …   0.746313   -0.0414604  -1.48199
  0.225635    0.54331   -0.883433       0.365797    0.365797    0.656254
  0.171204    0.106865  -1.44553        0.106865    0.430699   -0.313615
  ⋮                            

In [73]:
test_univar = scan_perms_distributed(pheno_y, geno, kinship; reml = false, nperms = 10, rndseed = 0, original = true, 
    option = "by blocks", nblocks = 150)

11×7321 Matrix{Float64}:
 0.00819636  0.00819636  0.00819636  …  0.0449878   0.0128283  0.0128283
 1.13823     1.13823     1.13823        0.972279    1.83125    1.83125
 0.182157    0.182157    0.182157       0.00992902  0.0104772  0.0104772
 0.0430406   0.0430406   0.0430406      0.276304    0.248817   0.248817
 1.02692     1.02692     1.02692        0.305662    0.41929    0.41929
 0.0273182   0.0273182   0.0273182   …  0.258444    0.567742   0.567742
 0.369665    0.369665    0.369665       0.383784    0.797462   0.797462
 0.0619086   0.0619086   0.0619086      0.0428826   0.120096   0.120096
 1.50799     1.50799     1.50799        0.0231452   0.124231   0.124231
 0.0214375   0.0214375   0.0214375      0.0567621   0.066094   0.066094
 0.457624    0.457624    0.457624    …  7.60606e-5  0.122794   0.122794

In [75]:
scan_perms(pheno_y, geno, kinship; nperms = 10, rndseed = 0, original = true)

11×7321 Matrix{Float64}:
 0.00819636  0.00819636  0.00819636  …  0.0449878   0.0128283  0.0128283
 1.13823     1.13823     1.13823        0.972279    1.83125    1.83125
 0.182157    0.182157    0.182157       0.00992902  0.0104772  0.0104772
 0.0430406   0.0430406   0.0430406      0.276304    0.248817   0.248817
 1.02692     1.02692     1.02692        0.305662    0.41929    0.41929
 0.0273182   0.0273182   0.0273182   …  0.258444    0.567742   0.567742
 0.369665    0.369665    0.369665       0.383784    0.797462   0.797462
 0.0619086   0.0619086   0.0619086      0.0428826   0.120096   0.120096
 1.50799     1.50799     1.50799        0.0231452   0.124231   0.124231
 0.0214375   0.0214375   0.0214375      0.0567621   0.066094   0.066094
 0.457624    0.457624    0.457624    …  7.60606e-5  0.122794   0.122794

In [77]:
X00

79×7321 Matrix{Float64}:
  0.285431     0.285431     0.285431    …  -0.263737    -0.263737
  0.831539     0.831539     0.831539       -0.0785915   -0.0785915
 -1.66534     -1.66534     -1.66534        -0.200825    -0.200825
  0.108772     0.108772     0.108772        0.595318     0.595318
  0.303485     0.303485     0.303485       -0.0773292   -0.0773292
  0.120631     0.120631     0.120631    …   0.0323032    0.0323032
  0.120643     0.120643     0.120643       -0.212935    -0.212935
  0.165916     0.165916     0.165916        0.477606     0.477606
 -0.00450792  -0.00450792  -0.00450792     -1.36891     -1.36891
 -0.447304    -0.447304    -0.447304       -0.229852    -0.229852
 -0.0323028   -0.0323028   -0.0323028   …  -0.43835     -0.43835
  0.18168      0.18168      0.18168        -0.578258    -0.578258
 -0.438445    -0.438445    -0.438445       -0.66382     -0.66382
  ⋮                                     ⋱                ⋮
  1.09159      1.09159      1.09159         0.433778     0

In [119]:
# colStandardize!(X00)

In [120]:
# colStandardize!(test_perm)

In [129]:
r2lod(cor(test_perm[:, 1], X00[:, 1]), n)

-0.014339433331806976

In [128]:
function r2lod(r::Float64, n::Int64)
    
    return n/2 * log10(1-r^2)
    
end

r2lod (generic function with 1 method)

In [125]:
test_r2 = transpose(test_perm)*X00

11×7321 Matrix{Float64}:
  -2.25465   -2.25465   -2.25465  …   -6.5618     -4.58552   -4.58552
  19.4492    19.4492    19.4492       16.5071     23.2671    23.2671
  -8.63559   -8.63559   -8.63559      -4.37346    -4.37342   -4.37342
  -4.47862   -4.47862   -4.47862       7.76623     7.29101    7.29101
 -19.5399   -19.5399   -19.5399        8.28981    10.1436    10.1436
  -3.67677   -3.67677   -3.67677  …  -12.2989    -16.9253   -16.9253
  10.9827    10.9827    10.9827      -14.4337    -19.561    -19.561
   4.20183    4.20183    4.20183      -6.46415    -9.11429   -9.11429
 -23.407    -23.407    -23.407         0.530096    4.46546    4.46546
   2.25563    2.25563    2.25563      -7.06999    -7.3802    -7.3802
  12.2631    12.2631    12.2631   …   -2.6025      4.4259     4.4259

In [122]:
test_r2 .^2

11×7321 Matrix{Float64}:
   5.08345    5.08345    5.08345  …   43.0572     21.027    21.027
 378.272    378.272    378.272       272.486     541.358   541.358
  74.5735    74.5735    74.5735       19.1271     19.1268   19.1268
  20.058     20.058     20.058        60.3143     53.1589   53.1589
 381.806    381.806    381.806        68.7209    102.894   102.894
  13.5186    13.5186    13.5186   …  151.262     286.464   286.464
 120.619    120.619    120.619       208.333     382.634   382.634
  17.6554    17.6554    17.6554       41.7853     83.0702   83.0702
 547.886    547.886    547.886         0.281002   19.9403   19.9403
   5.08786    5.08786    5.08786      49.9847     54.4674   54.4674
 150.384    150.384    150.384    …    6.77298    19.5886   19.5886

In [81]:
n

79

In [93]:
test_lods_liteQTL = mapslices(x -> n/2 * log10.(1 .- x.^2), test_r2; dims = 1)

LoadError: DomainError with -71.98265731417176:
log10 will only return a complex result if called with a complex argument. Try log10(Complex(x)).

In [82]:
?map

search: [0m[1mm[22m[0m[1ma[22m[0m[1mp[22m [0m[1mm[22m[0m[1ma[22m[0m[1mp[22m! [0m[1mm[22m[0m[1ma[22m[0m[1mp[22mc [0m[1mm[22m[0m[1ma[22m[0m[1mp[22mcols [0m[1mm[22m[0m[1ma[22m[0m[1mp[22mfoldr [0m[1mm[22m[0m[1ma[22m[0m[1mp[22mfoldl [0m[1mm[22m[0m[1ma[22m[0m[1mp[22mcols! [0m[1mm[22m[0m[1ma[22m[0m[1mp[22mslices [0m[1mm[22m[0m[1ma[22m[0m[1mp[22mreduce



```
map(f, c...) -> collection
```

Transform collection `c` by applying `f` to each element. For multiple collection arguments, apply `f` elementwise, and stop when when any of them is exhausted.

See also [`map!`](@ref), [`foreach`](@ref), [`mapreduce`](@ref), [`mapslices`](@ref), [`zip`](@ref), [`Iterators.map`](@ref).

# Examples

```jldoctest
julia> map(x -> x * 2, [1, 2, 3])
3-element Vector{Int64}:
 2
 4
 6

julia> map(+, [1, 2, 3], [10, 20, 30, 400, 5000])
3-element Vector{Int64}:
 11
 22
 33
```

---

```
map(f, x::PooledArray; pure::Bool=false)
```

Transform `PooledArray` `x` by applying `f` to each element.

If `pure=true` then `f` is applied to each element of pool of `x` exactly once (even if some elements in pool are not present it `x`). This will typically be much faster when the proportion of unique values in `x` is small.

If `pure=false`, the returned array will use the same reference type as `x`, or `Int` if the number of unique values in the result is too large to fit in that type.
