## Data Analysis using BulkLMM - BXD Spleen Study

In [1]:
using CSV, DelimitedFiles, DataFrames, Missings, XLSX
using LinearAlgebra, Statistics, Optim
using Random, Distributions, LoopVectorization
using GeneNetworkAPI, Downloads
using BenchmarkTools

In [2]:
using Plots

In [3]:
local_path = "../../BulkLMM.jl/src";

In [33]:
include(joinpath(local_path, "kinship.jl"));
include(joinpath(local_path, "util.jl"));
include(joinpath(local_path, "wls.jl"));
include(joinpath(local_path, "lmm.jl"));
include(joinpath(local_path, "gridbrent.jl"));
include(joinpath(local_path, "transform_helpers.jl"));
include(joinpath(local_path, "scan.jl"));
include(joinpath(local_path, "bulkscan_helpers.jl"));
include(joinpath(local_path, "bulkscan.jl"));
include(joinpath(local_path, "readData.jl"));
include(joinpath(local_path, "../plot_utils/visuals_utils.jl"));
include(joinpath(local_path, "analysis_helpers/single_trait_analysis.jl"));

In [64]:
include(joinpath(local_path, "../test/testHelpers.jl"));

### Load data:

In [4]:
bulklmmdir = local_path;
pheno_file = joinpath(bulklmmdir,"..","data/bxdData/spleen-pheno-nomissing.csv");
pheno = readdlm(pheno_file, ',', header = false);
pheno_processed = pheno[2:end, 2:(end-1)].*1.0; # exclude the header, the first (transcript ID)and the last columns (sex)

In [5]:
geno_file = joinpath(bulklmmdir,"..","data/bxdData/spleen-bxd-genoprob.csv");
geno = readdlm(geno_file, ',', header = false);
geno_processed = geno[2:end, 1:2:end] .* 1.0;

In [6]:
size(pheno_processed) # (number of strains, number of traits)

(79, 35554)

In [7]:
size(geno_processed) # (number of strains, number of traits)

(79, 7321)

In [11]:
@time kinship = calcKinship(geno_processed); # calculate K

  0.306172 seconds (30.81 k allocations: 862.019 MiB, 8.70% gc time)


### Single trait scans:

In [34]:
traitID = 1112;
pheno_y = reshape(pheno_processed[:, traitID], :, 1);

In [35]:
@time single_results = scan(pheno_y, geno_processed, kinship);

  1.109311 seconds (1.37 M allocations: 110.635 MiB, 94.14% compilation time)


In [36]:
@time single_results_perms = scan(pheno_y, geno_processed, kinship; permutation_test = true, nperms = 1000, original = false);

  0.291795 seconds (456.68 k allocations: 108.309 MiB, 76.60% compilation time)


In [37]:
thrs = get_thresholds(single_results_perms, [0.85, 0.95]).thrs

2-element Vector{Float64}:
 3.1486274394173113
 3.7208709479810396

### Multiple trait scans:

In [38]:
Threads.nthreads()

16

In [74]:
BLAS.get_num_threads()

1

In [40]:
BLAS.set_num_threads(4)

In [69]:
h2_grid = collect(0.0:0.01:0.99);

In [70]:
h2_grid2 = collect(0.0:0.05:0.95);

In [72]:
pheno_st = colStandardize(pheno_processed);

In [53]:
@benchmark bulkscan_null_grid(pheno_processed, geno_processed, kinship, h2_grid)

BenchmarkTools.Trial: 1 sample with 1 evaluation.
 Single result which took [34m12.133 s[39m (27.80% GC) to evaluate,
 with a memory estimate of [33m17.66 GiB[39m, over [33m49220879[39m allocations.

In [71]:
@benchmark bulkscan_null_grid(pheno_processed, geno_processed, kinship, h2_grid2)

BenchmarkTools.Trial: 1 sample with 1 evaluation.
 Single result which took [34m5.106 s[39m (18.81% GC) to evaluate,
 with a memory estimate of [33m6.79 GiB[39m, over [33m10610121[39m allocations.

In [43]:
@time all_results_grid = bulkscan_null_grid(pheno_processed, geno_processed, kinship, h2_grid);

 11.350480 seconds (49.22 M allocations: 17.664 GiB, 27.72% gc time)


In [73]:
@time all_results_grid_st = bulkscan_null_grid(pheno_st, geno_processed, kinship, h2_grid);

 12.768860 seconds (49.22 M allocations: 17.664 GiB, 31.70% gc time)


In [59]:
@time all_results_exact = bulkscan_null(pheno_processed, geno_processed, kinship;
                                        optim_interval = 10);

231.826512 seconds (3.09 G allocations: 793.932 GiB, 26.78% gc time, 0.05% compilation time)


In [75]:
@time all_results_exact_st = bulkscan_null(pheno_st, geno_processed, kinship;
                                        optim_interval = 10);

233.371882 seconds (3.09 G allocations: 793.951 GiB, 26.51% gc time)


In [76]:
all_results_grid_st.L[:, 1]

7321-element Vector{Float64}:
 0.00012008682100828602
 0.00012008682100828602
 0.00012008682100828602
 0.00012008682100828602
 0.00012008682100828602
 0.00012008682100828602
 0.00012008682100828602
 0.00012008682100828602
 0.00012008682099685866
 0.00012008682099685866
 0.00012008387637556666
 0.00011849771856266301
 0.009521311901905454
 ⋮
 0.26017848855422565
 0.26017851503361494
 0.2601785150336246
 0.25344638801937
 0.378289358307625
 0.37828935830772425
 0.2193513999155637
 0.20144120787678782
 0.20153298758762725
 0.177823392806062
 0.18170671563926058
 0.18170671563924903

In [77]:
all_results_exact_st.L[:, 1]

7321-element Vector{Float64}:
 0.00012008682100828602
 0.00012008682100828602
 0.00012008682100828602
 0.00012008682100828602
 0.00012008682100828602
 0.00012008682100828602
 0.00012008682100828602
 0.00012008682100828602
 0.00012008682099685866
 0.00012008682099685866
 0.00012008387637556666
 0.00011849771856266301
 0.009521311901905454
 ⋮
 0.26017848855422565
 0.26017851503361494
 0.2601785150336246
 0.25344638801937
 0.378289358307625
 0.37828935830772425
 0.2193513999155637
 0.20144120787678782
 0.20153298758762725
 0.177823392806062
 0.18170671563926058
 0.18170671563924903

In [60]:
unique(all_results_grid.h2_null_list)

74-element Vector{Float64}:
 0.0
 0.78
 0.45
 0.38
 0.84
 0.61
 0.7
 0.52
 0.89
 0.5
 0.79
 0.63
 0.47
 ⋮
 0.31
 0.83
 0.28
 0.34
 0.35
 0.98
 0.86
 0.39
 0.81
 0.33
 0.29
 0.27

In [61]:
hcat(all_results_exact.h2_null_list, all_results_grid.h2_null_list)

35554×2 Matrix{Float64}:
 4.21494e-15  0.0
 1.22271e-15  0.0
 6.14951e-16  0.0
 1.60996e-15  0.0
 3.20109e-15  0.0
 9.95012e-16  0.0
 9.42488e-16  0.0
 9.95012e-16  0.0
 8.50543e-16  0.0
 1.00066e-15  0.0
 5.8249e-16   0.0
 3.18791e-15  0.0
 2.10747e-15  0.0
 ⋮            
 2.03871e-15  0.0
 6.81991e-15  0.0
 0.655454     0.66
 5.8249e-16   0.0
 1.60996e-15  0.0
 0.474575     0.47
 9.42488e-16  0.0
 9.42488e-16  0.0
 6.81991e-15  0.0
 0.682617     0.68
 1.60996e-15  0.0
 1.59804e-15  0.0

In [62]:
findmax(abs.(all_results_exact.h2_null_list .- all_results_grid.h2_null_list))

(0.009999981682421333, 1634)

In [63]:
all_results_exact.h2_null_list[1634], all_results_grid.h2_null_list[1634]

(0.9999999816824213, 0.99)

In [67]:
findmax(abs.(all_results_exact.L .- all_results_grid.L))

(0.9779549482273531, CartesianIndex(4023, 32346))

In [68]:
all_results_exact.h2_null_list[32346], all_results_grid.h2_null_list[32346]

(0.9999999816824213, 0.99)

In [32]:
BLAS.get_num_threads()

1