In [1]:
using CSV, DelimitedFiles, DataFrames, Missings, XLSX
using LinearAlgebra, Statistics, Optim
using Random, Distributions, LoopVectorization
using BenchmarkTools

In [2]:
using Plots

In [123]:
local_path = "../../BulkLMM.jl/src";

In [138]:
include(joinpath(local_path, "kinship.jl"));
include(joinpath(local_path, "util.jl"));
include(joinpath(local_path, "wls.jl"));
include(joinpath(local_path, "lmm.jl"));
include(joinpath(local_path, "gridbrent.jl"));
include(joinpath(local_path, "transform_helpers.jl"));
include(joinpath(local_path, "scan.jl"));
include(joinpath(local_path, "bulkscan_helpers.jl"));
include(joinpath(local_path, "bulkscan.jl"));
include(joinpath(local_path, "readData.jl"));
include(joinpath(local_path, "../plot_utils/visuals_utils.jl"));
include(joinpath(local_path, "analysis_helpers/single_trait_analysis.jl"));
include(joinpath(local_path, "../test/testHelpers.jl"));

In [6]:
bulklmmdir = local_path;
pheno_file = joinpath(bulklmmdir,"..","data/bxdData/spleen-pheno-nomissing.csv");
pheno = readdlm(pheno_file, ',', header = false);
pheno_processed = pheno[2:end, 2:(end-1)].*1.0; # exclude the header, the first (transcript ID)and the last columns (sex)

In [7]:
geno_file = joinpath(bulklmmdir,"..","data/bxdData/spleen-bxd-genoprob.csv");
geno = readdlm(geno_file, ',', header = false);
geno_processed = geno[2:end, 1:2:end] .* 1.0;

In [15]:
(n, m) = size(pheno_processed);

In [17]:
p = size(geno_processed, 2);

In [9]:
@time kinship = calcKinship(geno_processed); # calculate K

  0.236701 seconds (30.81 k allocations: 862.019 MiB, 7.51% gc time)


In [69]:
@time (Y0, X0, lambda0) = transform_rotation(pheno_processed, geno_processed, kinship);

  0.017453 seconds (29 allocations: 30.429 MiB)


In [70]:
size(X0) # check that n#col = p + 1

(79, 7322)

In [71]:
@time test1_L = weighted_liteqtl(Y0, X0, lambda0, test_grid[1]);

  1.446959 seconds (468.81 k allocations: 2.056 GiB, 38.61% gc time)


In [72]:
@time test1_logLR = test1_L .* log(10)

  0.628405 seconds (4 allocations: 1.939 GiB, 1.56% gc time)


7321×35554 Matrix{Float64}:
 0.00027651   0.108551   0.00682573  …  0.0704603  0.00771174  0.802891
 0.00027651   0.108551   0.00682573     0.0704603  0.00771174  0.802891
 0.00027651   0.108551   0.00682573     0.0704603  0.00771174  0.802891
 0.00027651   0.108551   0.00682573     0.0704603  0.00771174  0.802891
 0.00027651   0.108551   0.00682573     0.0704603  0.00771174  0.802891
 0.00027651   0.108551   0.00682573  …  0.0704603  0.00771174  0.802891
 0.00027651   0.108551   0.00682573     0.0704603  0.00771174  0.802891
 0.00027651   0.108551   0.00682573     0.0704603  0.00771174  0.802891
 0.00027651   0.108551   0.00682573     0.0704603  0.00771174  0.802891
 0.00027651   0.108551   0.00682573     0.0704603  0.00771174  0.802891
 0.000276503  0.108551   0.00682576  …  0.0704603  0.00771174  0.802892
 0.000272851  0.108586   0.00683966     0.0704901  0.00771303  0.803004
 0.0219236    0.152786   0.0413911      0.112449   0.00887184  0.889538
 ⋮                                  

In [73]:
weights1 = makeweights(test_grid[1], lambda0);

In [74]:
prior = [0.0, 0.0];

In [75]:
@time test1_logL0 = wls_multivar(Y0, reshape(X0[:, 1], :, 1), weights1, prior).Ell;

  0.185161 seconds (425.21 k allocations: 121.507 MiB, 60.71% gc time)


In [76]:
@time test1_logL1 = test1_logLR .+ repeat(test1_logL0, 7321)

  3.462887 seconds (6 allocations: 3.879 GiB, 14.36% gc time)


7321×35554 Matrix{Float64}:
 97.3425  74.9454  72.8573  66.6923  …  92.201   112.126  142.004  145.839
 97.3425  74.9454  72.8573  66.6923     92.201   112.126  142.004  145.839
 97.3425  74.9454  72.8573  66.6923     92.201   112.126  142.004  145.839
 97.3425  74.9454  72.8573  66.6923     92.201   112.126  142.004  145.839
 97.3425  74.9454  72.8573  66.6923     92.201   112.126  142.004  145.839
 97.3425  74.9454  72.8573  66.6923  …  92.201   112.126  142.004  145.839
 97.3425  74.9454  72.8573  66.6923     92.201   112.126  142.004  145.839
 97.3425  74.9454  72.8573  66.6923     92.201   112.126  142.004  145.839
 97.3425  74.9454  72.8573  66.6923     92.201   112.126  142.004  145.839
 97.3425  74.9454  72.8573  66.6923     92.201   112.126  142.004  145.839
 97.3425  74.9454  72.8573  66.6923  …  92.201   112.126  142.004  145.839
 97.3425  74.9454  72.8573  66.6923     92.201   112.126  142.004  145.839
 97.3641  74.9896  72.8919  66.666      92.1663  112.168  142.005  145.9

In [77]:
test_grid = [0.0, 0.50, 0.95]

3-element Vector{Float64}:
 0.0
 0.5
 0.95

In [78]:
test_grid = collect(0.0:0.05:0.95)

20-element Vector{Float64}:
 0.0
 0.05
 0.1
 0.15
 0.2
 0.25
 0.3
 0.35
 0.4
 0.45
 0.5
 0.55
 0.6
 0.65
 0.7
 0.75
 0.8
 0.85
 0.9
 0.95

In [79]:
BLAS.get_num_threads()

4

In [80]:
BLAS.set_num_threads(4)

In [81]:
test_h2_panel = ones(p, m) .* test_grid[1];
test_h2_panel_counter = Int.(ones(p, m));

@time begin
    for h2 in test_grid
        testk_logLR = weighted_liteqtl(Y0, X0, lambda0, h2) .* log(10);
        weightsk = makeweights(h2, lambda0);
        testk_logL0 = wls_multivar(Y0, reshape(X0[:, 1], :, 1), weightsk, [0.0, 0.0]).Ell; 
        testk_logL1 = testk_logLR .+ repeat(testk_logL0, 7321)

        tmax!(test1_logL1, testk_logL1, test_h2_panel, test_h2_panel_counter, test_grid)
    end
end

126.520757 seconds (17.88 M allocations: 159.847 GiB, 18.53% gc time)


In [107]:
test_all_logL0 = zeros(20, m);
test_all_logL0[1, :] = test1_logL0;

In [108]:
test1_logL0

1×35554 Matrix{Float64}:
 97.3422  74.8368  72.8505  66.6552  …  91.6516  112.055  141.996  145.036

In [110]:
@time begin 
    
    for k in 2:20
        h2 = test_grid[k];
        weightsk = makeweights(h2, lambda0);
        testk_logL0 = wls_multivar(Y0, reshape(X0[:, 1], :, 1), weightsk, [0.0, 0.0]).Ell; 
        test_all_logL0[k, :] = testk_logL0;
    end
    
end

  1.161604 seconds (8.08 M allocations: 2.255 GiB, 11.55% gc time)


In [113]:
test_all_logL0

20×35554 Matrix{Float64}:
 97.3422  74.8368  72.8505  66.6552  …  91.6516  112.055  141.996  145.036
 96.5777  74.2778  72.1035  65.9431     91.156   111.678  141.417  144.349
 96.0902  73.9975  71.6349  65.5108     90.9283  111.595  141.125  143.924
 95.6735  73.7915  71.2388  65.1522     90.7642  111.601  140.913  143.555
 95.2815  73.6148  70.8688  64.8212     90.6196  111.651  140.736  143.194
 94.8945  73.4496  70.5056  64.4985  …  90.4766  111.727  140.575  142.821
 94.5014  73.2859  70.1381  64.1732     90.3258  111.82   140.42   142.423
 94.0939  73.1175  69.7582  63.8373     90.1607  111.924  140.263  141.992
 93.6647  72.939   69.3589  63.4841     89.976   112.035  140.098  141.516
 93.2065  72.7455  68.9331  63.1067     89.7667  112.147  139.92   140.987
 92.7112  72.5317  68.473   62.6974  …  89.527   112.256  139.722  140.393
 92.1687  72.2913  67.9689  62.2472     89.2502  112.356  139.497  139.719
 91.566   72.0165  67.4084  61.7443     88.9276  112.439  139.237  138.946

In [117]:
@time test_logL0_optimum = mapslices(x -> maximum(x), test_all_logL0, dims = 1) |> x -> repeat(x, 7321);

  2.377470 seconds (552.92 k allocations: 1.957 GiB, 0.33% gc time, 6.20% compilation time)


In [119]:
results_alt = (test1_logL1 .- test_logL0_optimum) ./ log(10);

In [82]:
test_h2_panel

7321×35554 Matrix{Float64}:
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.15  0.0  0.0  0.0  0.35  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.15  0.0  0.0  0.0  0.35  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.15  0.0  0.0  0.0  0.35  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.15  0.0  0.0  0.0  0.35  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.15  0.0  0.0  0.0  0.35  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.15  0.0  0.0  0.0  0.35  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.15  0.0  0.0  0.0  0.35  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.15  0.0  0.0  0.0  0.35  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.15  0.0  0.0  0.0  0.35  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.15  0.0  0.0  0.0  0.35  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0  …  0.15  0.0  0.0  0.0  0.35  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.15  0.0  0.0  0.0  0.35  0.0  0.0
 0.0  0.0  0.0  0.0  0.0  0.0  0.0     0.15  0.0  0.0  0.0  0.3   0.0  0

In [86]:
BLAS.get_num_threads()

4

In [91]:
BLAS.set_num_threads(8)

In [92]:
@time test_bulkscan_null_grid = bulkscan_null_grid(pheno_processed, geno_processed, kinship, test_grid);

  6.327137 seconds (10.61 M allocations: 6.788 GiB, 22.77% gc time)


In [121]:
mean(abs.(results_alt .- test_bulkscan_null_grid.L))

0.003190289481501103

In [143]:
grid_finer = collect(0.0:0.01:0.99)

100-element Vector{Float64}:
 0.0
 0.01
 0.02
 0.03
 0.04
 0.05
 0.06
 0.07
 0.08
 0.09
 0.1
 0.11
 0.12
 ⋮
 0.88
 0.89
 0.9
 0.91
 0.92
 0.93
 0.94
 0.95
 0.96
 0.97
 0.98
 0.99

In [None]:
@time test_bulkscan_alt_grid = bulkscan_alt_grid(pheno_processed, geno_processed, kinship, grid_finer);

In [None]:
@time test_scan_alt = scan(reshape(pheno_processed[:, 1112], :, 1), geno_processed, kinship; assumption = "alt", optim_interval = 4);

In [None]:
hcat(test_scan_alt.h2_each_marker, test_h2_panel[:, 1112], test_bulkscan_alt_grid.h2_panel[:, 1112])

In [147]:
hcat(test_scan_alt.lod, test_bulkscan_alt_grid.L[:, 1112], results_alt[:, 1112])

7321×3 Matrix{Float64}:
 0.474433   0.377639     0.377639
 0.474433   0.377639     0.377639
 0.474433   0.377639     0.377639
 0.474433   0.377639     0.377639
 0.474433   0.377639     0.377639
 0.474433   0.377639     0.377639
 0.474433   0.377639     0.377639
 0.474433   0.377639     0.377639
 0.474433   0.377639     0.377639
 0.474433   0.377639     0.377639
 0.474433   0.377639     0.377639
 0.474625   0.377806     0.377806
 0.718646   0.573483     0.573483
 ⋮                       
 0.0318565  0.000183332  0.000183332
 0.0318565  0.000183327  0.000183327
 0.0318566  0.000183327  0.000183327
 0.0227071  0.0123942    0.0123942
 0.093469   0.0797131    0.0797131
 0.093469   0.0797131    0.0797131
 0.398281   0.427481     0.427481
 0.400334   0.433584     0.433584
 0.40036    0.433594     0.433594
 0.38686    0.422936     0.422936
 0.634617   0.754686     0.754686
 0.634617   0.754686     0.754686

In [148]:
mean(abs.(test_scan_alt.lod .- test_bulkscan_alt_grid.L[:, 1112]))

0.05908721531033275