In [1]:
using Pkg
Pkg.activate("/media/mat/HDD/AdaptiveTransportMap/")

[32m[1m Activating[22m[39m environment at `/media/mat/HDD/AdaptiveTransportMap/Project.toml`


In [2]:
using Revise
using AdaptiveTransportMap
using LinearAlgebra
import LinearAlgebra: ldiv!, dot
using Test
using ForwardDiff
using SpecialFunctions
using BenchmarkTools
using TransportMap
using QuadGK
using AdaptiveTransportMap: derivative, vander, transform!, evaluate, ncoeff
using DiffResults
using Distributions
using Random
using LoopVectorization
using Optim
using NLsolve
using Polynomials
using Optim: InverseDiagonal#, ldiv!, dot

┌ Info: Precompiling AdaptiveTransportMap [bdf749b0-1400-4207-80d3-e689c0e3f03d]
└ @ Base loading.jl:1260


### Type stability

In [12]:
B = ProHermite(10; scaled = true)
x = randn(1000);

  30.929 ns (2 allocations: 32 bytes)


0.1425484378703077

In [22]:
function timing()
    @btime vander($B, $0, $x)
    @btime $B($7.0)
    @btime exp(-7.0^2/2.0)
end

timing (generic function with 1 method)

In [23]:
timing()

  34.634 μs (167 allocations: 89.45 KiB)
  31.072 ns (2 allocations: 32 bytes)
  1.181 ns (0 allocations: 0 bytes)


2.289734845645553e-11

In [13]:
@btime vander($B, $0, $x);

  32.879 μs (167 allocations: 89.45 KiB)


1000×11 Array{Float64,2}:
 0.553428  -0.402375   -0.184469    0.405972    …  -0.253378    0.2214
 0.63156   -0.0121385  -0.446416    0.0148647      -0.0190327  -0.312725
 0.468272   0.512313    0.065212   -0.377111       -0.0691254   0.292051
 0.538869   0.429493   -0.138983   -0.414635        0.205378    0.265815
 0.61891   -0.176473   -0.402055    0.210277       -0.247556   -0.188874
 0.477694  -0.504926    0.0396091   0.388098    …   0.0331524   0.303991
 0.628303  -0.0911663  -0.434924    0.110872       -0.139027   -0.279335
 0.157936  -0.371884    0.507506   -0.386292       -0.239625   -0.123452
 0.62566   -0.121826   -0.425635    0.14732        -0.181561   -0.253014
 0.585904  -0.32119    -0.289794    0.35397        -0.319655    0.0637283
 0.540661   0.426394   -0.144521   -0.413954    …   0.211663    0.261168
 0.306568  -0.521291    0.410008    0.0231148       0.292783   -0.214368
 0.308224  -0.52215     0.407526    0.0277468       0.295454   -0.209605
 ⋮                        

In [22]:
ImmutablePolynomial{Float64}

ImmutablePolynomial{Float64,N} where N

In [24]:
typeof(ImmutablePolynomial([1.0]))

ImmutablePolynomial{Float64,1}

In [4]:
@code_warntype B(1.0)

Variables
  P[36m::PhyHermite{10}[39m
  x[36m::Float64[39m

Body[36m::Float64[39m
[90m1 ─[39m %1  = Base.getproperty(P, :Poly)[36m::PhyPolyHermite{10}[39m
[90m│  [39m %2  = Base.getproperty(%1, :P)[91m[1m::ImmutablePolynomial{Float64,N} where N[22m[39m
[90m│  [39m %3  = (%2)(x)[36m::Float64[39m
[90m│  [39m %4  = Core.apply_type(Base.Val, 2)[36m::Core.Compiler.Const(Val{2}, false)[39m
[90m│  [39m %5  = (%4)()[36m::Core.Compiler.Const(Val{2}(), false)[39m
[90m│  [39m %6  = Base.literal_pow(AdaptiveTransportMap.:^, x, %5)[36m::Float64[39m
[90m│  [39m %7  = -%6[36m::Float64[39m
[90m│  [39m %8  = (%7 / 2)[36m::Float64[39m
[90m│  [39m %9  = AdaptiveTransportMap.exp(%8)[36m::Float64[39m
[90m│  [39m %10 = (%3 * %9)[36m::Float64[39m
[90m└──[39m       return %10


### Greedy fit

In [4]:
Nx = 10

m = 20
Ne = 100
X = randn(Nx, Ne).^2 + 0.1*randn(Nx, Ne)

10×100 Array{Float64,2}:
  3.68039     0.946875     0.693619   …  0.855523    0.304843   0.898246
  1.07442     0.112446     0.921913      4.57749     0.107159   2.62052
  0.541196    0.334715    -0.0658506     0.101096    0.17725    0.0605184
  0.0386828   2.84174      4.04136       0.0465724   0.484137   0.187518
  3.1959      0.651701     0.194581      4.07973    -0.0578114  0.256688
  0.0727446   0.53932      2.85015    …  2.61873     0.604214   1.27297
 -0.0368003   0.180983     2.74352       1.13634     0.162536   0.449194
  3.26952     0.739949     1.66152       3.58785    -0.0402513  0.496681
  0.14564     2.72651      6.17343       1.88102     0.712513   2.11774
  1.35496    -0.00990635   0.1061        1.5473      0.718061   1.15609

In [5]:
@code_warntype Hk_old = HermiteMapk(m, Nx; α = 1e-6)

ErrorException: expression is not a function call, or is too complex for @code_warntype to analyze; break it down to simpler parts if possible

In [25]:
L = LinearTransform(X; diag = true)
transform!(L, X)

10×100 Array{Float64,2}:
 -0.408456  -0.330729   0.0267807  …   0.445658  -0.307129  -0.550616
  0.398451  -0.419435  -0.129437       0.138649  -0.632225  -0.377752
  0.414137  -0.656114   0.155972       0.267643  -0.580941  -0.438812
 -0.414127  -0.524096  -0.41446       -0.161132   0.159428  -0.789193
  1.04546    0.197856  -0.109473      -0.641067  -0.87027    2.41466
 -0.759387   0.269095   1.80427    …  -0.475912  -0.613684   1.90154
 -0.111056  -0.573471  -0.81206        2.61869   -0.621644  -0.600618
 -0.808418  -0.478858   0.0477805      0.314501  -0.351539   0.227846
 -0.71285   -0.60675   -0.235489      -0.695719  -0.645286  -0.604663
  0.356629   1.02711   -0.751437       0.772187  -0.218078  -0.180552

In [27]:
X_train = X[:,1:80]
X_valid = X[:,81:100]

10×20 Array{Float64,2}:
 -0.72746    -0.237852    1.43312    …   0.445658  -0.307129  -0.550616
 -0.386074    0.0791423   0.723439       0.138649  -0.632225  -0.377752
 -0.51259    -0.55344     0.0360886      0.267643  -0.580941  -0.438812
 -0.459435   -0.715676   -0.128447      -0.161132   0.159428  -0.789193
 -0.0233207  -0.58865    -0.131694      -0.641067  -0.87027    2.41466
  0.423174    0.298617   -0.532006   …  -0.475912  -0.613684   1.90154
 -0.350061    0.0893499  -0.65779        2.61869   -0.621644  -0.600618
  0.351178   -0.419078   -0.607032       0.314501  -0.351539   0.227846
  0.027123   -0.695594   -0.14186       -0.695719  -0.645286  -0.604663
  0.422556   -0.505413   -0.501859       0.772187  -0.218078  -0.180552

In [36]:
@btime greedyfit($m, $Nx, $X_train, $X_valid, $10; verbose = false);

  23.745 ms (139564 allocations: 38.88 MiB)


In [53]:
@code_warntype greedyfit(m, Nx, X_train, X_valid, 15; verbose = false);

Variables
  #unused#[36m::Core.Compiler.Const(AdaptiveTransportMap.var"#greedyfit##kw"(), false)[39m
  @_2[36m::NamedTuple{(:verbose,),Tuple{Bool}}[39m
  @_3[36m::Core.Compiler.Const(AdaptiveTransportMap.greedyfit, false)[39m
  m[36m::Int64[39m
  k[36m::Int64[39m
  X[36m::Array{Float64,2}[39m
  Xvalid[36m::Array{Float64,2}[39m
  maxterms[36m::Int64[39m
  maxpatience[36m::Int64[39m
  verbose[36m::Bool[39m
  @_11[36m::Int64[39m
  @_12[36m::Bool[39m

Body[91m[1m::Tuple{HermiteMapk{_A,1,_B} where _B where _A,Array{Float64,1},Array{Float64,1}}[22m[39m
[90m1 ─[39m %1  = Base.haskey(@_2, :maxpatience)[36m::Core.Compiler.Const(false, false)[39m
[90m└──[39m       goto #3 if not %1
[90m2 ─[39m       Core.Compiler.Const(:(Base.getindex(@_2, :maxpatience)), false)
[90m│  [39m       Core.Compiler.Const(:(%3 isa AdaptiveTransportMap.Int64), false)
[90m│  [39m       Core.Compiler.Const(:(%4), false)
[90m│  [39m       Core.Compiler.Const(:(goto %9), false)
[9

In [151]:
@code_warntype greedyfit($m, $Nx, $X_train, $X_valid, $15; verbose = false);

ErrorException: syntax: "$" expression outside quote

In [99]:
Hk_old = HermiteMapk(m, Nx; α = 1e-6);

@time S = Storage(Hk_old.I.f, X);

Hk_new, train_error, valid_error = greedyfit(m, Nx, X_train, X_valid, 10; verbose = true);

  0.000038 seconds (42 allocations: 120.516 KiB)
1 terms - Training error: 1.4443504670380543, Validation error: 1.3147907978711455
2 terms - Training error: 1.013555754394048, Validation error: 1.0148284980423214
3 terms - Training error: 0.9740035731615574, Validation error: 0.9536709077333043
4 terms - Training error: 0.968194963388136, Validation error: 0.9269258934472301
5 terms - Training error: 0.8660654654774056, Validation error: 0.8538651976765612
6 terms - Training error: 0.8468938234481649, Validation error: 0.8374329264723278
7 terms - Training error: 0.8118015219715872, Validation error: 0.8007373200002506
8 terms - Training error: 0.808988833791454, Validation error: 0.8016700608672841
9 terms - Training error: 0.8055509920369043, Validation error: 0.8064967607768131
10 terms - Training error: 0.7857644650326624, Validation error: 0.8090534466320567


In [106]:
Hk_old = HermiteMapk(m, Nx; α = 1e-6);

@time S = Storage(Hk_old.I.f, X);

Hk_new, train_error, valid_error = greedyfit(m, Nx, X_train, X_valid, 10; verbose = true);

  0.000057 seconds (42 allocations: 120.516 KiB)
1 terms - Training error: 1.41428818328749, Validation error: 1.435039932873401
2 terms - Training error: 1.011647231546337, Validation error: 1.0765340323252046
3 terms - Training error: 0.954187562314565, Validation error: 1.009496573370243
4 terms - Training error: 0.9532824496069529, Validation error: 1.007776307368724
5 terms - Training error: 0.841380513349966, Validation error: 0.9041126563257862
6 terms - Training error: 0.8299467025145106, Validation error: 0.9037778557535633
7 terms - Training error: 0.8018842783083129, Validation error: 0.8750517929963674
8 terms - Training error: 0.801758584142784, Validation error: 0.8767880843281514
9 terms - Training error: 0.8015539210804201, Validation error: 0.8774101595696208
10 terms - Training error: 0.7995583928098936, Validation error: 0.8823916723449652


In [83]:
Hk_test = deepcopy(Hk_new)
setcoeff!(Hk_test, zero(getcoeff(Hk_new)));

In [85]:
S_test = Storage(Hk_test.I.f, X_train)
coeff_test = getcoeff(Hk_test)

res = Optim.optimize(Optim.only_fg!(negative_log_likelihood!(S_test, Hk_test, X_train)), coeff_test, Optim.BFGS())
Optim.minimizer(res)

10-element Array{Float64,1}:
 -11.217886066730607
  11.50914577979369
  26.61111083798624
 -24.71937861667548
  34.997147393519235
 -17.03501613771925
  11.077321531464895
   0.034843907277600714
   0.0381866111200113
   0.1782552001411759

In [78]:
res = Optim.optimize(Optim.only_fg!(negative_log_likelihood!(S, Hk_old, ens.S)), coeff, Optim.BFGS())
coeffopt = Optim.minimizer(res)

1-element Array{Float64,1}:
 -1.144117388080984

### Hessian computation 

In [22]:
Nx = 4
m = 10
B = MultiBasis(CstProHermite(m; scaled =true), Nx)

Nψ = 14
# idx = rand(0:5,Nψ, Nx)
idx =Matrix([5  2  5  3;
             0  2  2  5;
             5  4  3  4;
             4  1  3  1;
             3  2  4  5;
             0  3  3  3;
             4  2  0  0;
             2  0  2  4;
             4  4  0  5;
             0  4  0  2;
             4  5  5  5;
             0  1  2  3;
             4  2  4  0;
             4  3  2  3]);


coeff = [  2.421476831418713;
          0.23398851530507098;
         -0.5285368375369742;
          0.8947095708709891;
         -0.688462862924509;
          0.23331743780756214;
          2.0098809355359006;
          1.1243983698817437;
         -0.18373487759949592;
          0.24787268168991677;
          0.9041136183543212;
          1.9776434399606355;
          1.1972246121155632;
          0.3847306368698849]

f = ExpandedFunction(B, idx, coeff)

fp = ParametricFunction(f)
R = IntegratedFunction(fp)
H = HermiteMapk(R; α = 1e-6);
Ne = 500

500

In [23]:
sum(idx;dims = 2)

14×1 Array{Int64,2}:
 15
  9
 16
  9
 14
  9
  6
  8
 13
  6
 19
  6
 10
 12

In [24]:
X = randn(Nx, Ne) .* randn(Nx, Ne) 
S = Storage(H.I.f, X; hess = false);

J = 0.0
dJ = zeros(Nψ)
d2J = zeros(Nψ, Nψ)

@time negative_log_likelihood!(J, dJ, coeff, S, H, X)

precond!

  0.002007 seconds (1.53 k allocations: 2.742 MiB)


precond! (generic function with 2 methods)

In [25]:
@btime res = Optim.optimize(Optim.only_fg!(negative_log_likelihood!($S, $H, $X)), $coeff, Optim.LBFGS())

  104.624 ms (130849 allocations: 233.12 MiB)


 * Status: success

 * Candidate solution
    Final objective value:     1.479295e+00

 * Found with
    Algorithm:     L-BFGS

 * Convergence measures
    |x - x'|               = 1.24e-05 ≰ 0.0e+00
    |x - x'|/|x'|          = 1.82e-06 ≰ 0.0e+00
    |f(x) - f(x')|         = 1.87e-13 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 1.26e-13 ≰ 0.0e+00
    |g(x)|                 = 2.84e-09 ≤ 1.0e-08

 * Work counters
    Seconds run:   0  (vs limit Inf)
    Iterations:    28
    f(x) calls:    85
    ∇f(x) calls:   85


In [26]:
struct Preconditioner
    P::Symmetric{Float64}
    
    F::Cholesky{Float64, Matrix{Float64}}
end

function Preconditioner(P::Matrix{Float64})
    return Preconditioner(Symmetric(P), cholesky(Symmetric(P)))
end

ldiv!(x, P::Preconditioner, b) = copyto!(x, P.F \ b)
dot(A::Array, P::Preconditioner, B::Vector) = dot(A, P.P, B)

ErrorException: error in method definition: function LinearAlgebra.ldiv! must be explicitly imported to be extended

In [27]:

# res = Optim.optimize(Optim.only_fg!(negative_loglikelihood!(S, H, X)), coeff, 
#             Optim.LBFGS(P = Preconditioner(precond)))#, precondprep = (P, x) -> precond!(S, H, X)(P, x)))

In [28]:
@btime res = Optim.optimize(Optim.only_fg!(negative_log_likelihood!($S, $H, $X)), $coeff, Optim.LBFGS())#, precondprep = (P, x) -> Preconditioner(precond!($S, $H, $X)($P.P.data, x))))

  104.817 ms (130849 allocations: 233.12 MiB)


 * Status: success

 * Candidate solution
    Final objective value:     1.479295e+00

 * Found with
    Algorithm:     L-BFGS

 * Convergence measures
    |x - x'|               = 1.24e-05 ≰ 0.0e+00
    |x - x'|/|x'|          = 1.82e-06 ≰ 0.0e+00
    |f(x) - f(x')|         = 1.87e-13 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 1.26e-13 ≰ 0.0e+00
    |g(x)|                 = 2.84e-09 ≤ 1.0e-08

 * Work counters
    Seconds run:   0  (vs limit Inf)
    Iterations:    28
    f(x) calls:    85
    ∇f(x) calls:   85


In [29]:
precond = zeros(Nψ, Nψ)
coeff0 = randn(Nψ)
precond!(precond, coeff, S, H, X)
P = Preconditioner(precond)

res = Optim.optimize(Optim.only_fg!(negative_log_likelihood!(S, H, X)), coeff0, Optim.LBFGS(; m = 20, P = Preconditioner(precond)))

 * Status: success

 * Candidate solution
    Final objective value:     1.479295e+00

 * Found with
    Algorithm:     L-BFGS

 * Convergence measures
    |x - x'|               = 6.21e-05 ≰ 0.0e+00
    |x - x'|/|x'|          = 9.10e-06 ≰ 0.0e+00
    |f(x) - f(x')|         = 7.68e-13 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 5.19e-13 ≰ 0.0e+00
    |g(x)|                 = 3.43e-09 ≤ 1.0e-08

 * Work counters
    Seconds run:   0  (vs limit Inf)
    Iterations:    8
    f(x) calls:    17
    ∇f(x) calls:   17


In [30]:
Optim.minimizer(res)

14-element Array{Float64,1}:
  0.2042411208812283
 -0.160665456718217
  5.371746068389059
 -0.19909085135432283
  6.822999267856531
 -0.14374191048398882
 -0.3861327468309733
  0.788767701800107
  2.22362622541736
 -0.9700837073808384
  2.7757966850015574
  0.04063103525990578
  1.7876303361723063
 -1.6264347964407893

In [31]:
@btime res = Optim.optimize(Optim.only_fg!(negative_log_likelihood!($S, $H, $X)), $coeff, 
            Optim.LBFGS(; m = 20, P = Preconditioner($precond), precondprep = (P, x) -> Preconditioner(precond!($S, $H, $X)($P.P.data, x))))

  72.319 ms (38554 allocations: 68.59 MiB)


 * Status: success

 * Candidate solution
    Final objective value:     1.479295e+00

 * Found with
    Algorithm:     L-BFGS

 * Convergence measures
    |x - x'|               = 8.58e-05 ≰ 0.0e+00
    |x - x'|/|x'|          = 1.26e-05 ≰ 0.0e+00
    |f(x) - f(x')|         = 1.74e-12 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 1.18e-12 ≰ 0.0e+00
    |g(x)|                 = 5.12e-09 ≤ 1.0e-08

 * Work counters
    Seconds run:   0  (vs limit Inf)
    Iterations:    8
    f(x) calls:    17
    ∇f(x) calls:   17


In [32]:
@btime res = Optim.optimize(Optim.only_fg!(negative_log_likelihood!($S, $H, $X)), $coeff, 
            Optim.LBFGS(P = Preconditioner($precond)))#, precondprep = (P, x) -> Preconditioner(precond!($S, $H, $X)($P.P.data, x))))

  19.128 ms (26245 allocations: 46.64 MiB)


 * Status: success

 * Candidate solution
    Final objective value:     1.479295e+00

 * Found with
    Algorithm:     L-BFGS

 * Convergence measures
    |x - x'|               = 8.58e-05 ≰ 0.0e+00
    |x - x'|/|x'|          = 1.26e-05 ≰ 0.0e+00
    |f(x) - f(x')|         = 1.74e-12 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 1.18e-12 ≰ 0.0e+00
    |g(x)|                 = 5.12e-09 ≤ 1.0e-08

 * Work counters
    Seconds run:   0  (vs limit Inf)
    Iterations:    8
    f(x) calls:    17
    ∇f(x) calls:   17


In [33]:
@btime res = Optim.optimize(Optim.only_fg!(negative_log_likelihood!($S, $H, $X)), $coeff, 
            Optim.LBFGS(; m = 20, P = Preconditioner($precond)))#, precondprep = (P, x) -> Preconditioner(precond!(S, H, X)(P.P.data, x))))

  19.342 ms (26265 allocations: 46.64 MiB)


 * Status: success

 * Candidate solution
    Final objective value:     1.479295e+00

 * Found with
    Algorithm:     L-BFGS

 * Convergence measures
    |x - x'|               = 8.58e-05 ≰ 0.0e+00
    |x - x'|/|x'|          = 1.26e-05 ≰ 0.0e+00
    |f(x) - f(x')|         = 1.74e-12 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 1.18e-12 ≰ 0.0e+00
    |g(x)|                 = 5.12e-09 ≤ 1.0e-08

 * Work counters
    Seconds run:   0  (vs limit Inf)
    Iterations:    8
    f(x) calls:    17
    ∇f(x) calls:   17


In [34]:
@btime res = Optim.optimize(Optim.only_fg!(negative_log_likelihood!($S, $H, $X)), $coeff, 
            Optim.LBFGS(; m = 20, P = Preconditioner($precond)))

  19.304 ms (26265 allocations: 46.64 MiB)


 * Status: success

 * Candidate solution
    Final objective value:     1.479295e+00

 * Found with
    Algorithm:     L-BFGS

 * Convergence measures
    |x - x'|               = 8.58e-05 ≰ 0.0e+00
    |x - x'|/|x'|          = 1.26e-05 ≰ 0.0e+00
    |f(x) - f(x')|         = 1.74e-12 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 1.18e-12 ≰ 0.0e+00
    |g(x)|                 = 5.12e-09 ≤ 1.0e-08

 * Work counters
    Seconds run:   0  (vs limit Inf)
    Iterations:    8
    f(x) calls:    17
    ∇f(x) calls:   17


In [39]:
diagprecond = zeros(Nψ)
coeff0 = randn(Nψ)
diagprecond!(diagprecond, coeff, S, H, X)
D = Optim.InverseDiagonal(diagprecond)

ldiv!(out::Array, P::Optim.InverseDiagonal, A::Array) = copyto!(out, A .* P.diag)
dot(A::Array, P::Optim.InverseDiagonal, B::Vector) = dot(A, B ./ P.diag)

dot (generic function with 40 methods)

In [41]:
diagprecond

14-element Array{Float64,1}:
 0.00012403916115351907
 0.000914061082337064
 4.223509631951479e-5
 0.0010497362861250537
 9.846338287295053e-5
 0.0023101801954228643
 0.006057967285600174
 0.000385561839624177
 0.0005285393135169043
 0.003055127424459309
 6.995783516676906e-5
 0.003976096151961689
 0.0004067998335964076
 0.000124219015787626

In [44]:
res = Optim.optimize(Optim.only_fg!(negative_log_likelihood!(S, H, X)), coeff, 
            Optim.LBFGS(; m = 20, P = Diagonal(diagprecond)))

 * Status: success

 * Candidate solution
    Final objective value:     1.479295e+00

 * Found with
    Algorithm:     L-BFGS

 * Convergence measures
    |x - x'|               = 6.88e-05 ≰ 0.0e+00
    |x - x'|/|x'|          = 1.01e-05 ≰ 0.0e+00
    |f(x) - f(x')|         = 8.34e-13 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 5.63e-13 ≰ 0.0e+00
    |g(x)|                 = 9.72e-09 ≤ 1.0e-08

 * Work counters
    Seconds run:   0  (vs limit Inf)
    Iterations:    11
    f(x) calls:    23
    ∇f(x) calls:   23


In [45]:
@btime res = Optim.optimize(Optim.only_fg!(negative_log_likelihood!($S, $H, $X)), $coeff, 
            Optim.LBFGS(; m = 20, P = Diagonal($diagprecond)))

  25.826 ms (35493 allocations: 63.09 MiB)


 * Status: success

 * Candidate solution
    Final objective value:     1.479295e+00

 * Found with
    Algorithm:     L-BFGS

 * Convergence measures
    |x - x'|               = 6.88e-05 ≰ 0.0e+00
    |x - x'|/|x'|          = 1.01e-05 ≰ 0.0e+00
    |f(x) - f(x')|         = 8.34e-13 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 5.63e-13 ≰ 0.0e+00
    |g(x)|                 = 9.72e-09 ≤ 1.0e-08

 * Work counters
    Seconds run:   0  (vs limit Inf)
    Iterations:    11
    f(x) calls:    23
    ∇f(x) calls:   23


In [37]:
res = Optim.optimize(Optim.only_fg!(negative_log_likelihood!(S, H, X)), zeros(Nψ), Optim.NGMRES())

 * Status: success

 * Candidate solution
    Final objective value:     1.479295e+00

 * Found with
    Algorithm:     Nonlinear GMRES preconditioned with Gradient Descent

 * Convergence measures
    |x - x'|               = 3.87e-05 ≰ 0.0e+00
    |x - x'|/|x'|          = 5.68e-06 ≰ 0.0e+00
    |f(x) - f(x')|         = 2.85e-13 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 1.93e-13 ≰ 0.0e+00
    |g(x)|                 = 5.95e-09 ≤ 1.0e-08

 * Work counters
    Seconds run:   1  (vs limit Inf)
    Iterations:    32
    f(x) calls:    123
    ∇f(x) calls:   123


In [128]:
@btime res = Optim.optimize(Optim.only_fg!(negative_log_likelihood!($S, $H, $X)), $coeff, Optim.AcceleratedGradientDescent())

  3.519 s (3527117 allocations: 6.96 GiB)


 * Status: failure (reached maximum number of iterations)

 * Candidate solution
    Final objective value:     1.572975e+00

 * Found with
    Algorithm:     Accelerated Gradient Descent

 * Convergence measures
    |x - x'|               = 4.30e-04 ≰ 0.0e+00
    |x - x'|/|x'|          = 1.07e-04 ≰ 0.0e+00
    |f(x) - f(x')|         = 2.20e-09 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 1.40e-09 ≰ 0.0e+00
    |g(x)|                 = 1.35e-05 ≰ 1.0e-08

 * Work counters
    Seconds run:   4  (vs limit Inf)
    Iterations:    1000
    f(x) calls:    3295
    ∇f(x) calls:   3295


In [None]:
MomentumGradientDescent()

In [30]:
X = randn(Nx, Ne) .* randn(Nx, Ne) 
S = Storage(H.I.f, X);

J = 0.0
dJ = zeros(Nψ)
d2J = zeros(Nψ, Nψ)
hess_negative_log_likelihood!(J, dJ, d2J, coeff, S, H, X)

1.3784285990365932

In [117]:
dJ

11-element Array{Float64,1}:
 -0.647941708993688
  0.01751723817324542
  0.12943970119038775
  0.0033091844731214684
 -0.00031264752126695754
  0.03545495128594644
 -0.0062483204244866645
 -0.028569877746525366
 -0.0008423358431159828
 -0.0033284486942029698
 -0.005733695198669693

In [31]:
@btime res = Optim.optimize(Optim.only_fgh!(hess_negative_log_likelihood!($S, $H, $X)), $coeff, Optim.NewtonTrustRegion())
# coeffopt = Optim.minimizer(res)

  193.793 ms (82746 allocations: 143.66 MiB)


 * Status: success

 * Candidate solution
    Final objective value:     1.341428e+00

 * Found with
    Algorithm:     Newton's Method (Trust Region)

 * Convergence measures
    |x - x'|               = 1.16e-05 ≰ 0.0e+00
    |x - x'|/|x'|          = 1.01e-06 ≰ 0.0e+00
    |f(x) - f(x')|         = 1.38e-13 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 1.03e-13 ≰ 0.0e+00
    |g(x)|                 = 2.62e-09 ≤ 1.0e-08

 * Work counters
    Seconds run:   0  (vs limit Inf)
    Iterations:    28
    f(x) calls:    29
    ∇f(x) calls:   29
    ∇²f(x) calls:  23


In [40]:
@time res = Optim.optimize(Optim.only_fgh!(hess_negative_log_likelihood!(S, H, X)), coeff, Optim.NewtonTrustRegion())

  0.025023 seconds (17.23 k allocations: 44.434 MiB, 25.59% gc time)


 * Status: success

 * Candidate solution
    Final objective value:     1.450904e+00

 * Found with
    Algorithm:     Newton's Method (Trust Region)

 * Convergence measures
    |x - x'|               = 3.17e-04 ≰ 0.0e+00
    |x - x'|/|x'|          = 2.29e-04 ≰ 0.0e+00
    |f(x) - f(x')|         = 1.02e-11 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 7.03e-12 ≰ 0.0e+00
    |g(x)|                 = 1.89e-13 ≤ 1.0e-08

 * Work counters
    Seconds run:   0  (vs limit Inf)
    Iterations:    4
    f(x) calls:    5
    ∇f(x) calls:   5
    ∇²f(x) calls:  4


In [121]:
res = Optim.optimize(Optim.only_fg!(negative_log_likelihood!(S, H, X)), coeff, Optim.LBFGS())
# coeffopt = Optim.minimizer(res)

 * Status: success

 * Candidate solution
    Final objective value:     1.298813e+00

 * Found with
    Algorithm:     L-BFGS

 * Convergence measures
    |x - x'|               = 4.47e-06 ≰ 0.0e+00
    |x - x'|/|x'|          = 4.06e-07 ≰ 0.0e+00
    |f(x) - f(x')|         = 5.15e-14 ≰ 0.0e+00
    |f(x) - f(x')|/|f(x')| = 3.97e-14 ≰ 0.0e+00
    |g(x)|                 = 6.45e-10 ≤ 1.0e-08

 * Work counters
    Seconds run:   0  (vs limit Inf)
    Iterations:    19
    f(x) calls:    58
    ∇f(x) calls:   58


In [68]:
@time negative_log_likelihood!(J, dJ, coeff, S, H, X)

  0.000169 seconds (793 allocations: 47.328 KiB)


2.137129544913297