In [2]:
using RCall # use R in Julia and thus in this jupyter notebook
using CSV # save/load txt files
using LowRankApprox # randomized low-rank approximation

In [3]:
L1 = Array{Float64,2}(CSV.read("../data/sample5000x20.txt", nullable = false, header = false, delim = ' '));

In [4]:
include("../code/julia/mixSQP_time.jl")
include("../code/julia/mixEM.jl");
include("../code/julia/REBayes.jl");

In [5]:
# mixSQP_time
# L1      :
# eps     :
# tol     :
# pqrtol    :
# sptol   :
# lowrank :
x = mixSQP_time(L1, eps=1e-8, tol=1e-8, pqrtol = 1e-10, sptol=1e-3, lowrank = "svd")[1];

In [6]:
# time comparison between mixEM, mixSQP and REBayes

## mixEM;
tic(); x_em = mixEM(L1)[1]; t_em = toq();

x_rebayes,t_rebayes = REBayes(L1);

tic(); x_mixsqp = mixSQP_time(L1)[1]; t_mixsqp = toq();

# mixSQP ourperforms on this small dataset 5000x20
["mixEM" "REbayes" "mixSQP"; t_em t_rebayes t_mixsqp]

Loading required package: Matrix[39m


2×3 Array{Any,2}:
   "mixEM"   "REbayes"   "mixSQP"
 19.9864    0.189       0.0473131

In [7]:
# EM does not converge until maxiter
[x_em x_rebayes x_mixsqp]

20×3 Array{Float64,2}:
 0.153501      0.469905  0.469905
 0.118181      0.0       0.0     
 0.0953305     0.0       0.0     
 0.0644788     0.0       0.0     
 0.0316927     0.0       0.0     
 0.0087753     0.0       0.0     
 0.000885194   0.0       0.0     
 1.66476e-5    0.0       0.0     
 2.77142e-8    0.0       0.0     
 4.31917e-12   0.0       0.0     
 1.64767e-15   0.0       0.0     
 1.14012e-13   0.0       0.0     
 0.300091      0.305865  0.305864
 0.227048      0.22423   0.22423 
 4.75833e-106  0.0       0.0     
 2.47033e-323  0.0       0.0     
 4.94066e-324  0.0       0.0     
 4.94066e-324  0.0       0.0     
 0.0           0.0       0.0     
 0.0           0.0       0.0     

In [8]:
# compare REBayes and mixsqp
print("l1 norm difference between solutions: "); println(norm(x_mixsqp - x_rebayes, 1))
print("relative difference between objective values: "); println(rel_error(L1,x_mixsqp,x_rebayes));

l1 norm difference between solutions: 4.0214898922030784e-7
relative difference between objective values: 5.3290705182007514e-14


In [9]:
# Let's try large dataset 100000x100
# time comparison between mixSQP and REBayes
# make a large data if it doesn't exist
# include("../code/julia/makedata.jl")

L = Array{Float64,2}(CSV.read("../data/sample100000x100.txt", nullable = false, header = false, delim = ' '));
@rput L;
R"t_rebayes = system.time(res <- REBayes::KWDual(L, rep(1,dim(L)[2]), rep(1,dim(L)[1])/dim(L)[1]))[3];
res$f[res$f < 1e-3] = 0
x_rebayes = res$f / sum(res$f)"
@rget x_rebayes;
@rget t_rebayes;

# mixSQP ourperforms on this large dataset 100000x100
tic(); x_mixsqp = mixSQP_time(L)[1]; t_mixsqp = toq();
["mixSQP" "REbayes"; t_mixsqp t_rebayes]

2×2 Array{Any,2}:
  "mixSQP"    "REbayes"
 0.388323   12.228     

In [10]:
# solution almost conincides
print("l1 norm difference between solutions: "); println(norm(x_mixsqp - x_rebayes, 1))
print("relative difference between objective values: "); println(rel_error(L,x_mixsqp,x_rebayes));

l1 norm difference between solutions: 2.6897182103940986e-6
relative difference between objective values: 0.0


In [11]:
# let's run Adaptive Shinkage for the comparison.
include("../code/julia/ash.jl")
srand(1);
z = [randn(50000);3*randn(50000)];
s = ones(100000);
out = ash(z,s, mult = 1.04);

# solution is sparse
x = sparse(out[4]); print(x)

# computation time
["likelihood" "lowrank" "mixSQP" "posterior"; out[5]']

  [1  ]  =  0.496888
  [87 ]  =  0.0755944
  [88 ]  =  0.427518

2×4 Array{Any,2}:
  "likelihood"   "lowrank"   "mixSQP"   "posterior"
 1.42912        0.229719    0.228807   0.477003    

In [13]:
# PETER: I get an error that s2 is not defined.
#
# :::Warning:::
# Perhaps you don't want to run this: it's much slower.
# current "ashr" package in R
L = out[3];
@rput z; @rput s2;
R"t_ash = system.time(res <- ashr::ash(z,s2, mixcompdist = 'normal', prior = 'uniform', gridmult = 1.04) )[3]"
@rget t_ash
R"x_ash = res$fitted_g$pi"
@rget x_ash
t_ash

LoadError: [91mUndefVarError: s2 not defined[39m

In [None]:
# solution almost conincides
print("l1 norm difference between solutions: "); println(norm(x - x_ash, 1))
print("relative difference between objective values: "); println(rel_error(L,x,x_ash));

## Session information

The section gives information about the computing environment used to generate the results contained in this
manuscript, including the version of Julia, Python and the Julia packages. 

In [None]:
# R library/system information
R"sessionInfo()"

In [None]:
Pkg.status("RCall")
Pkg.status("CSV")
Pkg.status("LowRankApprox")

In [None]:
versioninfo()