In [8]:
using Random, NPZ, Statistics
using MatrixFreeNewton

## [Michalewicz Function](https://www.sfu.ca/~ssurjano/michal.html)


$-\sum_{i=1}^{n}\sin(x_i)\sin^{2m}\left(\frac{ix_i^2}{\pi}\right)$

In [9]:
function michalewicz(x::AbstractVector{T}) where T
    f = zero(T)
    for i = 1:size(x)[1]
        f -= sin(x[i])*(sin((i*x[i]^2)/pi))^(20)
    end
    return f
end

n = 100
iterations = 100
lrsfn_rank = Int(floor(0.5*n))

# Allocate logger dictionaries for seeding
loggers_gd = Dict()
loggers_csgd = Dict()
loggers_newton = Dict()
loggers_sfn = Dict()


for seed in 0:10
    random_state = Random.MersenneTwister(seed)
    x_0 = randn(random_state,n)

    println("Now for gradient descent ")
    w_star_gd,logger_gd = gradientDescent(michalewicz,x_0,iterations = iterations)
    loggers_gd[seed] = logger_gd

    println("Now for curvature gradient descent ")
    w_star_gd,logger_csgd = curvatureScaledGradientDescent(michalewicz,x_0,iterations = iterations)
    loggers_csgd[seed] = logger_csgd

    print("Now for Newton \n")
    w_star_newton,logger_newton = fullNewton(michalewicz,x_0,alpha = 1e0,iterations=iterations)
    loggers_newton[seed] = logger_newton

    println("Now for low rank SFNewton with full rank Hessian ")
    w_star_newton,logger_sfn = lowRankSaddleFreeNewton(michalewicz,x_0,printing_frequency=iterations,
                                                                            iterations = iterations,rank = n)
    loggers_sfn[seed] = logger_sfn
end

loggers_lrsfn = Dict()

for lrsfn_rank in 10:10:90
    loggers_lrsfn[lrsfn_rank] = Dict()
    for seed in 1:10
        random_state = Random.MersenneTwister(seed)
        x_0 = randn(random_state,n)
        println("Now for low rank SFNewton with reduced Hessian with LRSFN rank = ",lrsfn_rank)
        w_star_newton,logger_lrsfn = lowRankSaddleFreeNewton(michalewicz,x_0,alpha = 1e0,printing_frequency=iterations,
                                                            gamma = 1e-3,iterations = iterations,rank = lrsfn_rank,
                                                            log_full_spectrum = true)
        loggers_lrsfn[lrsfn_rank][seed] = logger_lrsfn
    end
end
println("Done")

Now for gradient descent 
At initial guess obj = 1.2738719186494138
At iteration 10 obj = 0.5187410510395974
At iteration 20 obj = -0.3910790246253262
At iteration 30 obj = 0.0009934499118153667
At iteration 40 obj = 0.49041067574464725
At iteration 50 obj = 0.05571698805991789
At iteration 60 obj = 1.9908358625402117e-6
At iteration 70 obj = 0.9681793350161574
At iteration 80 obj = 8.97830597006094e-7
At iteration 90 obj = 4.5604440761086756e-7
At iteration 100 obj = 4.5058789847964395e-8
Now for curvature gradient descent 
At initial guess obj = 1.2738719186494138
At iteration 10 obj = -1.8533345119683688
At iteration 20 obj = -2.6807745711724866
At iteration 30 obj = -3.089575844447154
At iteration 40 obj = -3.3912345696485806
At iteration 50 obj = -3.638491668487654
At iteration 60 obj = -3.879215082934846
At iteration 70 obj = -4.522816683865038
At iteration 80 obj = -5.098679924941098
At iteration 90 obj = -5.21941275236018
At iteration 100 obj = -5.325060427691334
Now for Newton

At iteration 100 obj = -23.067392604326034 lambda_1 = 188983.13747211822 lambda_r = -3.167511117494658e-28
Now for gradient descent 
At initial guess obj = 3.095148222580407
At iteration 10 obj = 0.19995730900128048
At iteration 20 obj = -0.9511010177198432
At iteration 30 obj = -0.36222523884870833
At iteration 40 obj = -0.005221520145194147
At iteration 50 obj = 1.6902324399968611
At iteration 60 obj = -3.4593769196918477e-6
At iteration 70 obj = -0.09589860482951626
At iteration 80 obj = 3.739292099748788e-5
At iteration 90 obj = 8.947141674611137e-7
At iteration 100 obj = 5.663353308811135e-7
Now for curvature gradient descent 
At initial guess obj = 3.095148222580407
At iteration 10 obj = -3.092492789052139
At iteration 20 obj = -3.9738260520045263
At iteration 30 obj = -4.520904595410221
At iteration 40 obj = -4.888496249560576
At iteration 50 obj = -5.160564418299433
At iteration 60 obj = -5.390776894447083
At iteration 70 obj = -5.6130313471014786
At iteration 80 obj = -5.84494

At iteration 100 obj = -25.74657995396017 lambda_1 = 213177.35767624652 lambda_r = -3.985002173813758e-23
Now for gradient descent 
At initial guess obj = 0.27364921707816936
At iteration 10 obj = 1.9268658214235037
At iteration 20 obj = 0.2502460583542178
At iteration 30 obj = 0.00211044856375353
At iteration 40 obj = -0.15002109601730834
At iteration 50 obj = 0.004143791440260837
At iteration 60 obj = 4.314265251756859e-7
At iteration 70 obj = 4.6403423879778674e-7
At iteration 80 obj = 3.4110955633476075e-7
At iteration 90 obj = 1.8813264567644881e-7
At iteration 100 obj = -6.940385424192136e-8
Now for curvature gradient descent 
At initial guess obj = 0.27364921707816936
At iteration 10 obj = -3.326482838048159
At iteration 20 obj = -3.8859526163295914
At iteration 30 obj = -4.667992276344104
At iteration 40 obj = -5.007012496992183
At iteration 50 obj = -5.172265462335203
At iteration 60 obj = -5.307076510031584
At iteration 70 obj = -5.416810463377193
At iteration 80 obj = -5.504

Now for low rank SFNewton with reduced Hessian with LRSFN rank = 70
At initial guess obj = 1.9359872845877935

At iteration 100 obj = -21.044934238313086 lambda_1 = 122638.80782703806 lambda_r = 2.7494510460688453e-7
Now for low rank SFNewton with reduced Hessian with LRSFN rank = 70
At initial guess obj = -2.019069886261497

At iteration 100 obj = -24.378375005583898 lambda_1 = 213177.35767624658 lambda_r = -3.220386504694196e-8
Now for low rank SFNewton with reduced Hessian with LRSFN rank = 70
At initial guess obj = 0.27364921707816936

At iteration 100 obj = -18.574498994960404 lambda_1 = 110994.89320155956 lambda_r = 3.2052453092033304e-7
Now for low rank SFNewton with reduced Hessian with LRSFN rank = 80
At initial guess obj = -2.0957249151863446

At iteration 100 obj = -21.70910587266786 lambda_1 = 727407.8516069273 lambda_r = 5.521789105675854e-12
Now for low rank SFNewton with reduced Hessian with LRSFN rank = 80
At initial guess obj = -1.9883399436846045

At iteration 100 obj

In [11]:
# Save data for post-processing
data_dir = "michalewicz_data/"
if ~isdir(data_dir)
    mkdir(data_dir)
end
problem_name = "michalewicz_d="*string(n)
optimizers = ["gd","csgd","newton","sfn"]
logger_dicts = [loggers_gd,loggers_csgd,loggers_newton,loggers_sfn]

for (optimizer,logger_dict) in zip(optimizers,logger_dicts)
    println("optimizer = ",optimizer)
    opt_losses = zeros(0)
    for (seed,logger) in logger_dict
        name = problem_name*optimizer*"_"*string(seed)
        println("name = ",name)
        # Save losses
        npzwrite(data_dir*name*"_losses.npy",logger.losses)
        min_loss = minimum(logger.losses)
        append!(opt_losses,min_loss)
        # If sfn save spectrum:
        if optimizer == "sfn"
            npzwrite(data_dir*name*"_spectra.npy",logger.spectra)
        end
        # If csgd save alphas
        if optimizer in ["csgd"]
            npzwrite(data_dir*name*"_alphas.npy",logger.alphas)
        end
    end
    println("Min min loss = ",minimum(opt_losses))
    println("Avg min loss = ",Statistics.mean(opt_losses))
    println("Std min loss = ",Statistics.std(opt_losses,corrected = false))
end

optimizer = "lrsfn"
for (lrsfn_rank,rank_logger) in loggers_lrsfn
   opt_losses = zeros(0)
    for (seed,logger) in rank_logger
        name = problem_name*optimizer*"_"*string(seed)
        name *="rank_"*string(logger.rank)
        println("name = ",name)
        npzwrite(data_dir*name*"_losses.npy",logger.losses)
        min_loss = minimum(logger.losses)
        append!(opt_losses,min_loss)
        npzwrite(data_dir*name*"_spectra.npy",logger.spectra)
    end
    println("Min min loss = ",minimum(opt_losses))
    println("Avg min loss = ",Statistics.mean(opt_losses))
    println("Std min loss = ",Statistics.std(opt_losses,corrected = false))
end


optimizer = gd
name = michalewicz_d=100gd_5
name = michalewicz_d=100gd_8
name = michalewicz_d=100gd_1
name = michalewicz_d=100gd_0
name = michalewicz_d=100gd_6
name = michalewicz_d=100gd_9
name = michalewicz_d=100gd_3
name = michalewicz_d=100gd_7
name = michalewicz_d=100gd_4
name = michalewicz_d=100gd_2
name = michalewicz_d=100gd_10
Min min loss = -7.6749851250152945
Avg min loss = -2.788840504311165
Std min loss = 1.6580064448096508
optimizer = csgd
name = michalewicz_d=100csgd_5
name = michalewicz_d=100csgd_8
name = michalewicz_d=100csgd_1
name = michalewicz_d=100csgd_0
name = michalewicz_d=100csgd_6
name = michalewicz_d=100csgd_9
name = michalewicz_d=100csgd_3
name = michalewicz_d=100csgd_7
name = michalewicz_d=100csgd_4
name = michalewicz_d=100csgd_2
name = michalewicz_d=100csgd_10
Min min loss = -12.407819743605963
Avg min loss = -8.210897938733444
Std min loss = 2.0911637915442123
optimizer = newton
name = michalewicz_d=100newton_5
name = michalewicz_d=100newton_8
name = michalew