In [58]:
using RCall # use R in Julia and thus in this jupyter notebook
Pkg.status("RCall")

 - RCall                         0.8.1


In [59]:
using DataFrames # save/load txt file
Pkg.status("DataFrames")

 - DataFrames                    0.10.1


In [60]:
data = Array{Float64,2}(readtable("data/sample5000x20.txt", header = false, separator = ' '));

In [61]:
include("code/mixSQP_time.jl")
include("code/mixEM.jl");
include("code/REBayes.jl");

In [62]:
x1 = mixSQP_time(data, eps=1e-8, tol=1e-8, ptol = 1e-10, sptol=1e-3, lowrank = "svd")[1];
L = data ./ maximum(data,2);
x2 = mixSQP_time(L)[1];
norm(x2 - x1, 1)  # row-wise renormalization yields the same solution

In [63]:
# time comparison between mixEM, mixSQP and REBayes

## mixEM;
tic(); x_em = mixEM(L)[1]; t_em = toq();

x_rebayes,t_rebayes = REBayes(L);

tic(); x_mixsqp = mixSQP_time(L)[1]; t_mixsqp = toq();

# mixSQP ourperforms on this small dataset 5000x20
["mixEM" "REbayes" "mixSQP"; t_em t_rebayes t_mixsqp]

2×3 Array{Any,2}:
   "mixEM"   "REbayes"   "mixSQP"
 14.7347    0.08        0.0353605

In [64]:
# EM does not converge until maxiter
[x_em x_rebayes x_mixsqp]

20×3 Array{Float64,2}:
 0.127199      0.0         0.0       
 0.127199      0.0         0.0       
 0.127645      0.0         0.0       
 0.128086      0.0         0.0       
 0.127556      0.354333    0.354355  
 0.123721      0.627736    0.627714  
 0.11013       0.0         0.0       
 0.0770192     0.0         0.0       
 0.0304384     0.0         0.0       
 0.00458465    0.0         0.0       
 0.000387899   0.0         0.0       
 0.000140873   0.0         0.0       
 0.000683394   0.00330509  0.00330597
 0.0073103     0.00675424  0.00675372
 4.2626e-7     0.0         0.0       
 1.81378e-27   0.0         0.0       
 1.04048e-38   0.0         0.0       
 0.00521181    0.00514342  0.00514348
 0.0026887     0.00272876  0.00272873
 1.17097e-188  0.0         0.0       

In [65]:
# compare REBayes and mixsqp
print("l1 norm difference between solutions: "); println(norm(x_mixsqp - x_rebayes, 1))
print("relative difference between objective values: "); println(rel_error(L,x_mixsqp,x_rebayes));

l1 norm difference between solutions: 4.5283897708680524e-5
relative difference between objective values: 1.7352785874891197e-13


In [66]:
# Let's try large dataset 100000x100
# time comparison between mixSQP and REBayes
# make a large data if it doesn't exist
# include("code/makedata.jl")

L = Array{Float64,2}(readtable("data/sample100000x100.txt", header = false, separator = ' '));
@rput L;
R"t_rebayes = system.time(res <- REBayes::KWDual(L, rep(1,dim(L)[2]), rep(1,dim(L)[1])/dim(L)[1]))[3];
res$f[res$f < 1e-3] = 0
x_rebayes = res$f / sum(res$f)"
@rget x_rebayes;
@rget t_rebayes;

# mixSQP ourperforms on this large dataset 100000x100
tic(); x_mixsqp = mixSQP_time(L)[1]; t_mixsqp = toq();
["mixSQP" "REbayes"; t_mixsqp t_rebayes]

2×2 Array{Any,2}:
  "mixSQP"    "REbayes"
 0.275421   13.981     

In [67]:
# solution almost conincides
print("l1 norm difference between solutions: "); println(norm(x_mixsqp - x_rebayes, 1))
print("relative difference between objective values: "); println(rel_error(L,x_mixsqp,x_rebayes));

l1 norm difference between solutions: 2.696556654122051e-6
relative difference between objective values: 4.440892098500626e-16


In [68]:
# let's run Adaptive Shinkage for the comparison.
include("code/ash.jl")
srand(1);
z = [randn(50000);3*randn(50000)];
s = ones(100000);
out = ash(z,s, mult = 1.04);

# solution is sparse
x = sparse(out[4]); print(x)

# computation time
["likelihood" "lowrank" "mixSQP" "posterior"; out[5]']

  [1  ]  =  0.496888
  [87 ]  =  0.0755944
  [88 ]  =  0.427518

2×4 Array{Any,2}:
  "likelihood"   "lowrank"   "mixSQP"   "posterior"
 1.43529        0.229641    0.287963   0.115075    

In [69]:
# :::Warning:::
# Perhaps you don't want to run this: it's much slower.
# current "ashr" package in R
L = out[3];
@rput z; @rput s2;
R"t_ash = system.time(res <- ashr::ash(z,s2, mixcompdist = 'normal', prior = 'uniform', gridmult = 1.04) )[3]"
@rget t_ash
R"x_ash = res$fitted_g$pi"
@rget x_ash
t_ash

In [70]:
# solution almost conincides
print("l1 norm difference between solutions: "); println(norm(x - x_ash, 1))
print("relative difference between objective values: "); println(rel_error(L,x,x_ash));

l1 norm difference between solutions: 4.191712685658363e-6
relative difference between objective values: 2.0004398137984936e-10


In [71]:
# R library/system information
R"sessionInfo()"

RCall.RObject{RCall.VecSxp}
R version 3.4.3 (2017-11-30)
Platform: x86_64-apple-darwin15.6.0 (64-bit)
Running under: macOS High Sierra 10.13.3

Matrix products: default
BLAS: /System/Library/Frameworks/Accelerate.framework/Versions/A/Frameworks/vecLib.framework/Versions/A/libBLAS.dylib
LAPACK: /Library/Frameworks/R.framework/Versions/3.4/Resources/lib/libRlapack.dylib

locale:
[1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
[1] REBayes_1.2   Matrix_1.2-12

loaded via a namespace (and not attached):
 [1] Rcpp_0.12.15      lattice_0.20-35   codetools_0.2-15  Rmosek_8.0.69    
 [5] foreach_1.4.4     assertthat_0.2.0  truncnorm_1.0-7   MASS_7.3-47      
 [9] grid_3.4.3        pscl_1.5.2        doParallel_1.0.11 iterators_1.0.9  
[13] parallel_3.4.3    compiler_3.4.3    ashr_2.2-4        SQUAREM_2017.10-1
