In [1]:
using Pkg
Pkg.activate("/media/mat/HDD/AdaptiveTransportMap/")

[32m[1m Activating[22m[39m environment at `/media/mat/HDD/AdaptiveTransportMap/Project.toml`


In [2]:
using Revise
using AdaptiveTransportMap
using LinearAlgebra
import LinearAlgebra: ldiv!, dot
using Test
using ForwardDiff
using SpecialFunctions
using BenchmarkTools
using TransportMap
using QuadGK
using AdaptiveTransportMap: derivative, vander, transform!, evaluate, ncoeff, optimize
using DiffResults
using Distributions
using Random
using LoopVectorization
using Optim
using NLsolve
using Polynomials
using Optim: InverseDiagonal
using MLDataUtils
using Test

┌ Info: Precompiling AdaptiveTransportMap [bdf749b0-1400-4207-80d3-e689c0e3f03d]
└ @ Base loading.jl:1260


### Expanded function

In [173]:
Nx = 3
Ne = 500

X = Matrix([-0.899679   0.149212  -0.203239;
     -1.20342   -0.196523  -0.484925;
     -1.03261   -1.04501    0.367875;
     -0.343662   0.424117   0.720608;
      0.32033    0.345181   0.739602;
      1.0377    -1.029     -0.610308;
      0.36607    2.42988   -0.0927185;
     -1.15715    0.803241   1.71867]')

X = randn(Nx, Ne)

B = MultiBasis(CstProHermite(3; scaled =true), Nx)

idx = [0 0 0; 0 0 1; 0 1 0; 0 1 1; 0 1 2; 1 0 0]

Nψ = size(idx,1)
coeff =  randn(Nψ)

f = ExpandedFunction(B, idx, coeff)

C = MapComponent(f)

out = zeros(Ne, Nψ);
cache = zeros(Ne, maximum(idx[:,end])+1);

S = Storage(ParametricFunction(f), X);
xlast = view(X, Nx, :);

In [174]:
methods(negative_log_likelihood!)

In [188]:
J = 0.0
dJ = zero(coeff)
@time negativeloglikelihood!(J, dJ, coeff, S, C, X)

  0.000681 seconds (274 allocations: 495.125 KiB)


0.7795392540533528

### HermiteMap

In [199]:
Nx = 20
Ne = 300
m = 10
X = randn(Nx, Ne) .* randn(Nx, Ne) .+ rand(Nx);
X0 = deepcopy(X);

In [200]:
M = HermiteMap(m, X);

In [201]:
optimize(M, X, 6; apply_rescaling = true);

In [210]:
function timing()
#     @btime begin
#         Nx = 10
#         Ne = 300
#         m = 10
#         X = randn(Nx, Ne) .* randn(Nx, Ne) .+ rand(Nx);
#         X0 = deepcopy(X);   
# #         M = HermiteMap(m, X);    
# #         optimize(M, X, 4; apply_rescaling = true)
#     end
    
    @btime begin
        Nx = 30
        Ne = 500
        m = 10
        X = randn(Nx, Ne) .* randn(Nx, Ne) .+ rand(Nx);
        X0 = deepcopy(X);   
        M = HermiteMap(m, X);    
        optimize(M, X, 10; apply_rescaling = true)
    end
end

timing (generic function with 1 method)

In [212]:
timing();

  2.350 s (1843817 allocations: 1.78 GiB)


In [None]:
timing();

In [21]:
timing();

  35.510 μs (11 allocations: 94.58 KiB)
  977.112 ms (1135599 allocations: 1.88 GiB)


In [137]:
function operation!(out, a, B)
    Nl = size(a,1)
    NlB, Nc = size(B)
    # Nl and NlB should match
    
    out .= reshape(a .* B, (Nl*Nc))    
end

operation! (generic function with 1 method)

In [143]:
function operation_avx!(out, a, B)
    Nl = size(a,1)
    NlB, Nc = size(B)
    # Nl and NlB should match
    @avx for j=1:Nc
        for i=1:Nl
            out[(j-1)*Nl+i] = a[i]*B[i,j]
        end
    end
end

operation_avx! (generic function with 1 method)

In [145]:
Nl = 5000
Nc = 100

a = randn(Nl)
B = randn(Nl, Nc)
out = zeros(Nl*Nc)

@btime operation!($out, $a, $B)

@btime operation_avx!($out, $a, $B)

  559.556 μs (4 allocations: 3.81 MiB)
  152.427 μs (0 allocations: 0 bytes)


### Greedyfit

In [147]:
Nx = 10
Ne = 300
m = 20

# X = Matrix([0.267333   1.43021;
#       0.364979   0.607224;
#      -1.23693    0.249277;
#      -2.0526     0.915629;
#      -0.182465   0.415874;
#       0.412907   1.01672;
#       1.41332   -0.918205;
#       0.766647  -1.00445]');

X = randn(Nx, Ne) .* randn(Nx, Ne) .+ rand(Nx)
L = LinearTransform(X; diag = true)

transform!(L, X)

10×300 Array{Float64,2}:
 -0.510134  -0.562906      0.207835   …   0.603149   -0.135178   -2.06874
  0.563244   0.270002      0.0730356      0.183428   -1.98688     1.49501
 -0.580729  -0.0807876    -1.36437        3.19454     1.0868      0.127014
  0.223594  -0.505226      0.11727       -0.0820303   0.683815    0.444308
  0.766521  -0.610468      0.794075       0.459418   -0.541611    0.137747
 -0.631377   0.000790393  -0.736893   …   0.33681     0.0761422   0.108859
  0.377613  -0.123608      0.0403368     -0.17197    -1.24888    -0.0448311
 -2.72811    0.0854831     0.364345       0.334086   -0.332072    1.4834
 -0.158118  -2.01583       1.07898        0.394449    0.11052     0.541683
  0.407627  -1.71966       0.492976      -0.806238   -0.805357    0.507764

In [148]:
@btime greedyfit($m, $Nx, $X, $8; verbose = false);

  19.438 ms (22481 allocations: 14.75 MiB)


In [33]:
@btime greedyfit($m, $Nx, $X, $8; verbose = false);

  17.678 ms (31741 allocations: 42.76 MiB)


In [30]:
@time greedyfit(m, Nx, X, 8; verbose = false);

  0.045642 seconds (41.58 k allocations: 64.954 MiB, 16.33% gc time)


In [34]:
@time greedyfit(m, Nx, X, 8; verbose = false);

  0.044889 seconds (41.58 k allocations: 64.954 MiB, 12.20% gc time)


In [35]:
@btime greedyfit($m, $Nx, $X, $8; verbose = false);

  29.638 ms (41583 allocations: 64.95 MiB)
