In [1]:
using Random, NPZ, Statistics
using MatrixFreeNewton

## [Michalewicz Function](https://www.sfu.ca/~ssurjano/michal.html)


$-\sum_{i=1}^{n}\sin(x_i)\sin^{2m}\left(\frac{ix_i^2}{\pi}\right)$

In [2]:
function michalewicz(x::AbstractVector{T}) where T
    f = zero(T)
    for i = 1:size(x)[1]
        f -= sin(x[i])*(sin((i*x[i]^2)/pi))^(20)
    end
    return f
end

n = 100
iterations = 100
lrsfn_rank = Int(floor(0.5*n))

# Allocate logger dictionaries for seeding
loggers_gd = Dict()
loggers_csgd = Dict()
loggers_newton = Dict()
loggers_sfn = Dict()
loggers_lrsfn = Dict()

for seed in 0:10
    random_state = Random.MersenneTwister(seed)
    x_0 = randn(random_state,n)

    println("Now for gradient descent ")
    w_star_gd,logger_gd = gradientDescent(michalewicz,x_0,iterations = iterations)
    loggers_gd[seed] = logger_gd

    println("Now for curvature gradient descent ")
    w_star_gd,logger_csgd = curvatureScaledGradientDescent(michalewicz,x_0,iterations = iterations)
    loggers_csgd[seed] = logger_csgd

    print("Now for Newton \n")
    w_star_newton,logger_newton = fullNewton(michalewicz,x_0,alpha = 1e0,iterations=iterations)
    loggers_newton[seed] = logger_newton

    println("Now for low rank SFNewton with full rank Hessian ")
    w_star_newton,logger_sfn = lowRankSaddleFreeNewton(michalewicz,x_0,printing_frequency=iterations,
                                                                            iterations = iterations,rank = n)
    loggers_sfn[seed] = logger_sfn
    
    println("Now for low rank SFNewton with reduced Hessian with LRSFN rank = ",lrsfn_rank)
    w_star_newton,logger_lrsfn = lowRankSaddleFreeNewton(michalewicz,x_0,alpha = 1e0,printing_frequency=iterations,
                                                        gamma = 1e-3,iterations = iterations,rank = lrsfn_rank,
                                                        log_full_spectrum = true)
    loggers_lrsfn[seed] = logger_lrsfn
    
end
println("Done")

Now for gradient descent 
At initial guess obj = 1.2738719186494138
At iteration 10 obj = 0.5187410510395974
At iteration 20 obj = -0.3910790246253262
At iteration 30 obj = 0.0009934499118153667
At iteration 40 obj = 0.49041067574464725
At iteration 50 obj = 0.05571698805991789
At iteration 60 obj = 1.9908358625402117e-6
At iteration 70 obj = 0.9681793350161574
At iteration 80 obj = 8.97830597006094e-7
At iteration 90 obj = 4.5604440761086756e-7
At iteration 100 obj = 4.5058789847964395e-8
Now for curvature gradient descent 
At initial guess obj = 1.2738719186494138
At iteration 10 obj = -1.8533345119683688
At iteration 20 obj = -2.6807745711724866
At iteration 30 obj = -3.089575844447154
At iteration 40 obj = -3.3912345696485806
At iteration 50 obj = -3.638491668487654
At iteration 60 obj = -3.879215082934846
At iteration 70 obj = -4.522816683865038
At iteration 80 obj = -5.098679924941098
At iteration 90 obj = -5.21941275236018
At iteration 100 obj = -5.325060427691334
Now for Newton

At iteration 50 obj = -8.453233351022462
At iteration 60 obj = -8.695592038099223
At iteration 70 obj = -8.910889597407724
At iteration 80 obj = -9.36366601428388
At iteration 90 obj = -10.122345951827352
At iteration 100 obj = -10.476145628163472
Now for Newton 
At initial guess obj = -1.0111784947125053
At iteration 10 obj = -1.462712256230977
At iteration 20 obj = -1.4627091980760167
At iteration 30 obj = -1.462709198020652
At iteration 40 obj = -1.4627091980206508
At iteration 50 obj = -1.4627091980206508
At iteration 60 obj = -1.4627091980206508
At iteration 70 obj = -1.4627091980206508
At iteration 80 obj = -1.4627091980206508
At iteration 90 obj = -1.4627091980206508
At iteration 100 obj = -1.4627091980206508
Now for low rank SFNewton with full rank Hessian 
At initial guess obj = -1.0111784947125053

At iteration 100 obj = -23.312915540910037 lambda_1 = 185248.0850793158 lambda_r = -1.7886942646779826e-27
Now for low rank SFNewton with reduced Hessian with LRSFN rank = 50
At in

At iteration 100 obj = -22.800165117618423 lambda_1 = 122638.80782703913 lambda_r = -8.955525119382186e-28
Now for low rank SFNewton with reduced Hessian with LRSFN rank = 50
At initial guess obj = 1.9359872845877935

At iteration 100 obj = -19.299038252808064 lambda_1 = 9.62253650682399e9 lambda_r = -2.9721660726830997e-6
Now for gradient descent 
At initial guess obj = -2.019069886261497
At iteration 10 obj = 0.7115135524375567
At iteration 20 obj = 0.0627839693101832
At iteration 30 obj = 0.8379285199849578
At iteration 40 obj = -0.004947943705949767
At iteration 50 obj = 0.0688030934339671
At iteration 60 obj = 0.5123400473692672
At iteration 70 obj = 2.03323050595401e-6
At iteration 80 obj = -2.6911697427710718e-6
At iteration 90 obj = 0.8922387343594104
At iteration 100 obj = 0.05675371360493396
Now for curvature gradient descent 
At initial guess obj = -2.019069886261497
At iteration 10 obj = -7.646856885277599
At iteration 20 obj = -9.710114698491463
At iteration 30 obj = -10.4

In [3]:
# Save data for post-processing
data_dir = "michalewicz_data/"
if ~isdir(data_dir)
    mkdir(data_dir)
end
problem_name = "michalewicz_d="*string(n)
optimizers = ["gd","csgd","newton","sfn","lrsfn"]
logger_dicts = [loggers_gd,loggers_csgd,loggers_newton,loggers_sfn,loggers_lrsfn]

for (optimizer,logger_dict) in zip(optimizers,logger_dicts)
    println("optimizer = ",optimizer)
    opt_losses = zeros(0)
    for (seed,logger) in logger_dict
        name = problem_name*optimizer*"_"*string(seed)
        if optimizer == "lrsfn"
            name *="rank_"*string(logger.rank)
        end
        println("name = ",name)
        # Save losses
        npzwrite(data_dir*name*"_losses.npy",logger.losses)
        min_loss = minimum(logger.losses)
        append!(opt_losses,min_loss)
        # If sfn save spectrum:
        if optimizer in ["sfn","lrsfn"]
            
            npzwrite(data_dir*name*"_spectra.npy",logger.spectra)
        end
        # If csgd save alphas
        if optimizer in ["csgd"]
            npzwrite(data_dir*name*"_alphas.npy",logger.alphas)
        end
    end
    println("Min min loss = ",minimum(opt_losses))
    println("Avg min loss = ",Statistics.mean(opt_losses))
    println("Std min loss = ",Statistics.std(opt_losses,corrected = false))
end

optimizer = gd
name = michalewicz_d=100gd_5
name = michalewicz_d=100gd_8
name = michalewicz_d=100gd_1
name = michalewicz_d=100gd_0
name = michalewicz_d=100gd_6
name = michalewicz_d=100gd_9
name = michalewicz_d=100gd_3
name = michalewicz_d=100gd_7
name = michalewicz_d=100gd_4
name = michalewicz_d=100gd_2
name = michalewicz_d=100gd_10
Min min loss = -7.6749851250152945
Avg min loss = -2.788840504311165
Std min loss = 1.6580064448096508
optimizer = csgd
name = michalewicz_d=100csgd_5
name = michalewicz_d=100csgd_8
name = michalewicz_d=100csgd_1
name = michalewicz_d=100csgd_0
name = michalewicz_d=100csgd_6
name = michalewicz_d=100csgd_9
name = michalewicz_d=100csgd_3
name = michalewicz_d=100csgd_7
name = michalewicz_d=100csgd_4
name = michalewicz_d=100csgd_2
name = michalewicz_d=100csgd_10
Min min loss = -12.407819743605963
Avg min loss = -8.210897938733444
Std min loss = 2.0911637915442123
optimizer = newton
name = michalewicz_d=100newton_5
name = michalewicz_d=100newton_8
name = michalew