Comparing optimization time and accuracy of solutions for running REBayes, EM and mixSQP algorithms. 

In [1]:
using RCall # use R in Julia and thus in this jupyter notebook
using CSV # save/load txt files
using LowRankApprox # randomized low-rank approximation



In [2]:
L1 = Array{Float64,2}(CSV.read("../data/sample5000x20.txt", nullable = false, header = false, delim = ' '));

In [3]:
include("../code/julia/mixSQP_time.jl")
include("../code/julia/mixEM.jl");
include("../code/julia/REBayes.jl");

In [4]:
# mixSQP_time
# L       :
# eps     :
# tol     :
# pqrtol    :
# sptol   :
# lowrank :
x = mixSQP_time(L1, eps=1e-8, tol=1e-8, pqrtol = 1e-10, sptol=1e-3, lowrank = "svd")[1];

In [20]:
# time comparison between mixEM, mixSQP and REBayes

## mixEM;
tic(); x_em = mixEM(L1)[1]; t_em = toq();

x_rebayes,t_rebayes = REBayes(L1);

tic(); x_mixsqp = mixSQP_time(L1)[1]; t_mixsqp = toq();

# mixSQP ourperforms on this small dataset 5000x20
["mixEM" "REbayes" "mixSQP"; t_em t_rebayes t_mixsqp]

2×3 Array{Any,2}:
   "mixEM"   "REbayes"   "mixSQP"
 16.998     0.091       0.0408473

In [23]:
minf = min.(eval_f(L1,x_em), eval_f(L1,x_rebayes),  eval_f(L1,x_mixsqp));
[eval_f(L1,x_em), eval_f(L1,x_rebayes),  eval_f(L1,x_mixsqp)] - minf

3-element Array{Float64,1}:
 1.36085e-5
 2.4869e-14
 0.0       

In [6]:
# EM does not converge until maxiter
[x_em x_rebayes x_mixsqp]

20×3 Array{Float64,2}:
 0.153501      0.469905  0.469905
 0.118181      0.0       0.0     
 0.0953305     0.0       0.0     
 0.0644788     0.0       0.0     
 0.0316927     0.0       0.0     
 0.0087753     0.0       0.0     
 0.000885194   0.0       0.0     
 1.66476e-5    0.0       0.0     
 2.77142e-8    0.0       0.0     
 4.31917e-12   0.0       0.0     
 1.64767e-15   0.0       0.0     
 1.14012e-13   0.0       0.0     
 0.300091      0.305865  0.305864
 0.227048      0.22423   0.22423 
 4.75833e-106  0.0       0.0     
 2.47033e-323  0.0       0.0     
 4.94066e-324  0.0       0.0     
 4.94066e-324  0.0       0.0     
 0.0           0.0       0.0     
 0.0           0.0       0.0     

In [7]:
# compare REBayes and mixsqp
print("l1 norm difference between solutions: "); println(norm(x_mixsqp - x_rebayes, 1))
print("relative difference between objective values: "); println(rel_error(L1,x_mixsqp,x_rebayes));

l1 norm difference between solutions: 4.0214898031076807e-7
relative difference between objective values: 5.2735593669694936e-14


In [8]:
# Let's try large dataset 100000x100
# time comparison between mixSQP and REBayes
# make a large data if it doesn't exist
# include("../code/julia/makedata.jl")

L = Array{Float64,2}(CSV.read("../data/sample100000x100.txt", nullable = false, header = false, delim = ' '));
@rput L;
R"t_rebayes = system.time(res <- REBayes::KWDual(L, rep(1,dim(L)[2]), rep(1,dim(L)[1])/dim(L)[1]))[3];
res$f[res$f < 1e-3] = 0
x_rebayes = res$f / sum(res$f)"
@rget x_rebayes;
@rget t_rebayes;

# mixSQP ourperforms on this large dataset 100000x100
tic(); x_mixsqp = mixSQP_time(L)[1]; t_mixsqp = toq();
["mixSQP" "REbayes"; t_mixsqp t_rebayes]

2×2 Array{Any,2}:
  "mixSQP"    "REbayes"
 0.28861    12.976     

In [9]:
# solution almost conincides
print("l1 norm difference between solutions: "); println(norm(x_mixsqp - x_rebayes, 1))
print("relative difference between objective values: "); println(rel_error(L,x_mixsqp,x_rebayes));

l1 norm difference between solutions: 2.6047920905769573e-6
relative difference between objective values: 0.0


In [10]:
# let's run Adaptive Shinkage for the comparison.
include("../code/julia/ash.jl")
srand(1);
z = [randn(50000);3*randn(50000)];
s = ones(100000);
out = ash(z,s, mult = 1.04);

# solution is sparse
x = sparse(out[4]); print(x)

# computation time
["likelihood" "lowrank" "mixSQP" "posterior"; out[5]']

  [1  ]  =  0.496888
  [87 ]  =  0.0755944
  [88 ]  =  0.427518

2×4 Array{Any,2}:
  "likelihood"   "lowrank"   "mixSQP"   "posterior"
 1.35093        0.351193    0.296403   0.406882    

In [11]:
# :::Warning:::
# Perhaps you don't want to run this: it's much slower.
# current "ashr" package in R
L = out[3];
@rput z; @rput s;
R"t_ash = system.time(res <- ashr::ash(z,s, mixcompdist = 'normal', prior = 'uniform', gridmult = 1.04) )[3]"
@rget t_ash
R"x_ash = res$fitted_g$pi"
@rget x_ash
t_ash

In [12]:
# solution almost conincides
print("l1 norm difference between solutions: "); println(norm(x - x_ash, 1))
print("relative difference between objective values: "); println(rel_error(L,x,x_ash));

l1 norm difference between solutions: 4.191711227991043e-6
relative difference between objective values: 2.000435372906395e-10


## Session information

The section gives information about the computing environment used to generate the results contained in this
manuscript, including the version of Julia, Python and the Julia packages. 

In [13]:
# R library/system information
R"sessionInfo()"

RCall.RObject{RCall.VecSxp}
R version 3.4.3 (2017-11-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.3

Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] REBayes_1.3   Matrix_1.2-12

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.15      lattice_0.20-35   codetools_0.2-15  Rmosek_8.0.69    
 [5] foreach_1.4.4     assertthat_0.2.0  truncnorm_1.0-8   MASS_7.3-48      
 [9] grid_3.4.3        pscl_1.5.2        doParallel_1.0.11 iterators_1.0.9  
[13] parallel_3.4.3    compiler_3.4.3    ashr_2.2-6        SQUAREM_2017.10-1


In [14]:
Pkg.status("RCall")
Pkg.status("CSV")
Pkg.status("LowRankApprox")

 - RCall                         0.8.1
 - CSV                           0.1.5
 - LowRankApprox                 0.1.0


In [15]:
versioninfo()

Julia Version 0.6.2
Commit d386e40c17 (2017-12-13 18:08 UTC)
Platform Info:
  OS: macOS (x86_64-apple-darwin14.5.0)
  CPU: Intel(R) Core(TM) i7-7567U CPU @ 3.50GHz
  WORD_SIZE: 64
  BLAS: libopenblas (USE64BITINT DYNAMIC_ARCH NO_AFFINITY Prescott)
  LAPACK: libopenblas64_
  LIBM: libopenlibm
  LLVM: libLLVM-3.9.1 (ORCJIT, broadwell)
