In [1]:
using Random, NPZ, Statistics
using MatrixFreeNewton

## [n-dimensional Rosenbrock](https://arxiv.org/pdf/1903.09556.pdf)
$\sum_{i=1}^{n-1} [100(x_{i+1} - x_i^2)^2 + (1-x_i)^2]$

In [2]:

function rosenbrockn(x)
    f = 0.0
    for i = 1:size(x)[1]-1
        f += 100*(x[i+1] - x[i]^2)^2 + (1 - x[i])^2
    end
    return f
end

n = 10

iterations = 1000
lrsfn_rank = Int(floor(0.9*n))

# Allocate logger dictionaries for seeding
loggers_gd = Dict()
loggers_csgd = Dict()
loggers_newton = Dict()
loggers_sfn = Dict()
loggers_lrsfn = Dict()
seed = 0


x_0 = zeros(n)

println("Now for gradient descent ")
w_star_gd,logger_gd = gradientDescent(rosenbrockn,x_0,alpha = 1e-3,iterations = iterations)
loggers_gd[seed] = logger_gd

println("Now for curvature gradient descent ")
w_star_gd,logger_csgd = curvatureScaledGradientDescent(rosenbrockn,x_0,iterations = iterations)
loggers_csgd[seed] = logger_csgd

print("Now for Newton \n")
w_star_newton,logger_newton = fullNewton(rosenbrockn,x_0,alpha = 1e0,iterations=15)
loggers_newton[seed] = logger_newton

println("Now for low rank SFNewton with full rank Hessian ")
w_star_newton,logger_sfn = lowRankSaddleFreeNewton(rosenbrockn,x_0,printing_frequency=iterations,
                                                                        iterations = 15,rank = n)
loggers_sfn[seed] = logger_sfn

println("Now for low rank SFNewton with reduced Hessian with LRSFN rank = ",lrsfn_rank)
w_star_newton,logger_lrsfn = lowRankSaddleFreeNewton(rosenbrockn,x_0,alpha = 1e-2,printing_frequency=10,
                                                    gamma = 1e0,iterations = iterations,rank = lrsfn_rank,
                                                    log_full_spectrum = true)
loggers_lrsfn[seed] = logger_lrsfn


println("Done")

Now for gradient descent 
At initial guess obj = 9.0
At iteration 10 obj = 8.879948397569317
At iteration 20 obj = 8.836638544438738
At iteration 30 obj = 8.792829881772933
At iteration 40 obj = 8.747652467475021
At iteration 50 obj = 8.701207304568682
At iteration 60 obj = 8.653607539016514
At iteration 70 obj = 8.604965243073693
At iteration 80 obj = 8.555387981171107
At iteration 90 obj = 8.5049759174601
At iteration 100 obj = 8.453819916894021
At iteration 110 obj = 8.402000672461867
At iteration 120 obj = 8.349588714944876
At iteration 130 obj = 8.296645084750546
At iteration 140 obj = 8.243222425775002
At iteration 150 obj = 8.189366280280828
At iteration 160 obj = 8.13511640477129
At iteration 170 obj = 8.080507976231992
At iteration 180 obj = 8.0255726068026
At iteration 190 obj = 7.970339127893842
At iteration 200 obj = 7.914834139747103
At iteration 210 obj = 7.859082348791283
At iteration 220 obj = 7.803106732974198
At iteration 230 obj = 7.746928584977876
At iteration 240 o

Now for Newton 
At initial guess obj = 9.0
At iteration 10 obj = 3.8082531610035275
Now for low rank SFNewton with full rank Hessian 
At initial guess obj = 9.0

Now for low rank SFNewton with reduced Hessian with LRSFN rank = 9
At initial guess obj = 9.0

At iteration 10 obj = 8.53919080043249 lambda_1 = 235.18211712553307 lambda_r = 196.83222513559045
At iteration 20 obj = 8.0320541904522 lambda_1 = 343.79122426975437 lambda_r = 188.142886904825
At iteration 30 obj = 7.53337543782478 lambda_1 = 493.60259024185143 lambda_r = 179.1341064952491
At iteration 40 obj = 7.054199349381285 lambda_1 = 653.6777069793687 lambda_r = 169.3101735977203
At iteration 50 obj = 6.582108893163216 lambda_1 = 809.1400447798992 lambda_r = 159.55467639152056
At iteration 60 obj = 6.108026792185571 lambda_1 = 952.9176460041575 lambda_r = 152.32564098834547
At iteration 70 obj = 5.552165831334911 lambda_1 = 1088.1192205148147 lambda_r = 150.11493570504402
At iteration 80 obj = 4.8626392438198955 lambda_1 = 12

At iteration 860 obj = 0.0014440480698067939 lambda_1 = 1752.6773658716477 lambda_r = 238.43622455100285
At iteration 870 obj = 0.001299493513114118 lambda_1 = 1753.153968467028 lambda_r = 238.58075620860313
At iteration 880 obj = 0.0011697648414931666 lambda_1 = 1753.608812855623 lambda_r = 238.7168600311167
At iteration 890 obj = 0.001053288920593943 lambda_1 = 1754.042858961615 lambda_r = 238.8450918750682
At iteration 900 obj = 0.0009486673677351408 lambda_1 = 1754.45702427745 lambda_r = 238.96596457052505
At iteration 910 obj = 0.0008546559646534746 lambda_1 = 1754.8521862134476 lambda_r = 239.07995188151546
At iteration 920 obj = 0.0007701466730932202 lambda_1 = 1755.2291842140082 lambda_r = 239.1874920378316
At iteration 930 obj = 0.0006941518971154068 lambda_1 = 1755.5888216750818 lambda_r = 239.28899089164554
At iteration 940 obj = 0.0006257906889141758 lambda_1 = 1755.9318676921425 lambda_r = 239.3848247448513
At iteration 950 obj = 0.0005642766393107311 lambda_1 = 1756.25905

In [3]:
# Save data for post-processing
data_dir = "rosenbrockn_data/"
if ~isdir(data_dir)
    mkdir(data_dir)
end
problem_name = "rosenbrock_d="*string(n)
optimizers = ["gd","csgd","newton","sfn","lrsfn"]
logger_dicts = [loggers_gd,loggers_csgd,loggers_newton,loggers_sfn,loggers_lrsfn]

for (optimizer,logger_dict) in zip(optimizers,logger_dicts)
    println("optimizer = ",optimizer)
    opt_losses = zeros(0)
    for (seed,logger) in logger_dict
        name = problem_name*optimizer*"_"*string(seed)
        if optimizer == "lrsfn"
            name *="rank_"*string(logger.rank)
        end
        println("name = ",name)
        # Save losses
        npzwrite(data_dir*name*"_losses.npy",logger.losses)
        min_loss = minimum(logger.losses)
        append!(opt_losses,min_loss)
        # If sfn save spectrum:
        if optimizer in ["sfn","lrsfn"]
            
            npzwrite(data_dir*name*"_spectra.npy",logger.spectra)
        end
        # If csgd save alphas
        if optimizer in ["csgd"]
            npzwrite(data_dir*name*"_alphas.npy",logger.alphas)
        end
    end
    println("Min min loss = ",minimum(opt_losses))
    println("Avg min loss = ",Statistics.mean(opt_losses))
    println("Std min loss = ",Statistics.std(opt_losses,corrected = false))
end

optimizer = gd
name = rosenbrock_d=10gd_0
Min min loss = 3.286912399840308
Avg min loss = 3.286912399840308
Std min loss = 0.0
optimizer = csgd
name = rosenbrock_d=10csgd_0
Min min loss = 2.6554529902106676
Avg min loss = 2.6554529902106676
Std min loss = 0.0
optimizer = newton
name = rosenbrock_d=10newton_0
Min min loss = 8.435625581350841e-21
Avg min loss = 8.435625581350841e-21
Std min loss = 0.0
optimizer = sfn
name = rosenbrock_d=10sfn_0
Min min loss = 1.660530177772186e-20
Avg min loss = 1.660530177772186e-20
Std min loss = 0.0
optimizer = lrsfn
name = rosenbrock_d=10lrsfn_0rank_9.0
Min min loss = 0.00033726698662376177
Avg min loss = 0.00033726698662376177
Std min loss = 0.0
