In [1]:
## using Pkg
## Pkg.add("PlotlyJS")

In [2]:
using DelimitedFiles
using LinearAlgebra
using Optim
using Distributions
using Test
using BenchmarkTools
using Random
using CSV
using DataFrames
using Distributed
using Plots

In [3]:
pwd()

"/Users/FredYu/Documents/GitHub/BulkLMM.jl/analysis"

In [4]:
## Include the source code of BulkLMM to be tested:
include("../src/scan.jl");
include("../src/lmm.jl");
include("../src/wls.jl");
include("../src/util.jl");
include("../src/kinship.jl");
include("../src/readData.jl");

## Also include the helper functions for writing tests:
include("../test/testHelper.jl");

In [5]:
## Read in BXD data:
pheno_file = "../data/bxdData/BXDtraits.csv"
pheno = readBXDpheno(pheno_file);
geno_file = "../data/bxdData/BXDgeno_prob.csv"
geno = readGenoProb_ExcludeComplements(geno_file);

kinship = calcKinship(geno); # calculate kinship matrix from genotype data

In [6]:
nperms = 1024; # number of permutated copies required;
m = size(pheno, 2);
p = size(geno, 2); # number of markers

heritabilities = Array{Float64, 1}(undef, m);


for j in 1:m
    heritabilities[j] = scan(reshape(pheno[:, j], :, 1), geno, kinship; reml = true)[2] 
end

histogram(heritabilities[1:7000], bins = 100)

length(heritabilities)

plot(heritabilities)

findmax(heritabilities)

In [7]:
## Consider the 7919-th trait
pheno_y = reshape(pheno[:, 7919], :, 1);

In [8]:
include("../src/scan.jl");

In [9]:
@btime scan_perms(pheno_y, geno, kinship; nperms = 100);

  876.718 ms (183990 allocations: 1.37 GiB)


In [None]:
@btime scan_perms(pheno_y, geno, kinship; nperms = 1024); # output structures of function

In [None]:
results_nperms = scan_perms(pheno_y, geno, kinship); # output structures of function

In [None]:
size(results_nperms)

## Do the same for many traits (first 1000):

In [None]:
# Function that extracts the row maximums for a matrix:
function getMaxLod_manyTraits(pheno::Array{Float64, 2}, geno::Array{Float64, 2}, kinship::Array{Float64, 2};
                              ntraits::Int64 = 1000, nperms::Int64 = 1024, original::Bool = true)
    
    if original
        nrows = nperms+1
    else
        nrows = nperms
    end
    
    maxLods_manyTraits = Matrix{Float64}(undef, nrows, ntraits);
    
    for k in 1:ntraits
        
        pheno_k = reshape(pheno[:, k], :, 1);
        results = scan_perms(pheno_k, geno, kinship; nperms = nperms)
        
        maxLods_manyTraits[:, k] = getMaxLod(results)
    end
    
    return maxLods_manyTraits; 
end


function getMaxLod(results::Array{Float64, 2})
    
    return mapslices(x -> maximum(x), results; dims = 2);
    
end

In [None]:
maxLods = getMaxLod(results_nperms);

In [None]:
typeof(maxLods)

In [None]:
size(maxLods)

In [None]:
quantile(maxLods[:, 1], 0.95)

In [None]:
histogram(maxLods)

In [None]:
typeof(pheno_y)

In [None]:
@btime scan(pheno_y, geno, kinship);

In [None]:
@btime scan_perms(pheno_y, geno, kinship; nperms = 0); # using rowDivide! with 1./sqrt(wts)

In [None]:
@btime scan_perms(pheno_y, geno, kinship; nperms = 0); # using rowMultiply! with sqrt(wts)

In [None]:
@btime getMaxLod_manyTraits(pheno, geno, kinship; ntraits = 1, nperms = 1024);

In [None]:
@btime getMaxLod_manyTraits(pheno, geno, kinship; ntraits = 100, nperms = 1);

In [None]:
@btime getMaxLod_manyTraits(pheno, geno, kinship; ntraits = 1000, nperms = 1);

In [None]:
@btime getMaxLod_manyTraits(pheno, geno, kinship; ntraits = 100, nperms = 10);

In [None]:
@btime getMaxLod_manyTraits(pheno, geno, kinship; ntraits = 1000, nperms = 10);

In [None]:
@btime getMaxLod_manyTraits(pheno, geno, kinship; ntraits = 100, nperms = 20);

In [None]:
@btime getMaxLod_manyTraits(pheno, geno, kinship; ntraits = 1000, nperms = 20);

In [None]:
@btime getMaxLod_manyTraits(pheno, geno, kinship; ntraits = 100, nperms = 100);

In [None]:
b = @benchmark getMaxLod_manyTraits(pheno, geno, kinship; ntraits = 1000, nperms = 100);

In [None]:
maxLods_1000Traits = getMaxLod_manyTraits(pheno, geno, kinship; ntraits = 1000, nperms = 100);

In [None]:
size(maxLods_1000Traits)

In [None]:
maxLods_1000Traits;

In [None]:
quantile_1000Traits = mapslices(x -> quantile(x, 0.95), maxLods_1000Traits; dims = 1);

In [None]:
histogram(vec(quantile_1000Traits), bins = 100)

In [None]:
findmax(quantile_1000Traits)