In [1]:
using Random

using ScalableNewton

## [n-dimensional Rosenbrock](https://arxiv.org/pdf/1903.09556.pdf)


$\sum_{i=1}^{n-1} [100(x_{i+1} - x_i^2)^2 + (1-x_i)^2]$

In [2]:
function rosenbrockn(x)
    f = 0.0
    for i = 1:size(x)[1]-1
        f += 100*(x[i+1] - x[i]^2)^2 + (1 - x[i])^2
    end
    return f
end

n = 10
print("Now for gradient descent \n")
w_star_gd,logger_gd = gradientDescent(rosenbrockn,zeros(n))

print("Now for curvature gradient descent \n")
w_star_gd,logger_curve_gd = curvatureScaledGradientDescent(rosenbrockn,zeros(n))

print("Now for Newton \n")
w_star_newton,logger_full_newton = fullNewton(rosenbrockn,zeros(n),alpha = 1e0,iterations=2*n)



Now for gradient descent 
At initial guess obj = 9.0
At iteration 10 obj = NaN
At iteration 20 obj = NaN
At iteration 30 obj = NaN
At iteration 40 obj = NaN
At iteration 50 obj = NaN
At iteration 60 obj = NaN
At iteration 70 obj = NaN
At iteration 80 obj = NaN
At iteration 90 obj = NaN
At iteration 100 obj = NaN
Now for curvature gradient descent 
At initial guess obj = 9.0
At iteration 10 obj = 8.709303623211357
At iteration 20 obj = 8.487770557748984
At iteration 30 obj = 8.28198689755699
At iteration 40 obj = 8.098128549963612
At iteration 50 obj = 7.933955800888958
At iteration 60 obj = 7.785872991823892
At iteration 70 obj = 7.650753325311406
At iteration 80 obj = 7.52615060336678
At iteration 90 obj = 7.410188188400873
At iteration 100 obj = 7.301422591306192
Now for Newton 
At initial guess obj = 9.0
At iteration 1 obj = 105.93924292104158
At iteration 2 obj = 126.6205786600929
At iteration 3 obj = 182.85400113825946
At iteration 4 obj = 343.6333994179984
At iteration 5 obj = 43

([1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0, 1.0], ScalableNewton.NewtonLogger(1.0, [9.0, 105.93924292104158, 126.6205786600929, 182.85400113825946, 343.6333994179984, 436.3586171045547, 252.56686372653868, 526.0980684641479, 416.6633226446785, 0.7606102988509199  …  0.11005306862251926, 0.7667041586551342, 4.274421151304933e-6, 1.207954020517505e-11, 8.435625581350841e-21, 0.0, 0.0, 0.0, 0.0, 0.0]))

In [3]:
print("Now for low rank SFNewton with full Hessian \n")
w_star_newton,logger_full_sfn = lowRankSaddleFreeNewton(rosenbrockn,zeros(n),hessian = "full",
                                                            iterations = 2*n,rank = n)

half_rank = Int(floor(0.9*n))
print("Now for low rank SFNewton with reduced Hessian with half rank \n")
println("half rank = ",half_rank)
w_star_newton,logger_lrsfn = lowRankSaddleFreeNewton(rosenbrockn,zeros(n),hessian = "reduced",alpha = 1e-4,
                                                    gamma = 1e-2,iterations = 100,rank = half_rank)

# print("Now for low rank SFNewton with gradient based Hessian \n")
# w_star_newton,trace_newton = lowRankSaddleFreeNewton(rosenbrockn,zeros(n),hessian = "from_gradient",
#                                                             iterations = 2*n,history = false,rank = n)

Now for low rank SFNewton with full Hessian 
At initial guess obj = 9.0

At iteration 1 obj = 105.9372628392383 lambda_1 = 202.0 lambda_r = 2.0
At iteration 2 obj = 126.6157922667713 lambda_1 = 1338.3544908236352 lambda_r = 57.732325573609366
At iteration 3 obj = 182.84013309458913 lambda_1 = 1818.8897958505645 lambda_r = 55.830110187725914
At iteration 4 obj = 343.5834988406558 lambda_1 = 2182.829301281455 lambda_r = 55.70087391595083
At iteration 5 obj = 436.27944038663406 lambda_1 = 2896.510656303326 lambda_r = 56.802027184382474
At iteration 6 obj = 252.46748856700808 lambda_1 = 3556.3555532387154 lambda_r = 60.24289234121143
At iteration 7 obj = 525.7119939514522 lambda_1 = 2522.065207853143 lambda_r = 57.95239903840006
At iteration 8 obj = 416.4757036423344 lambda_1 = 3464.0425800764074 lambda_r = 61.329514050228006
At iteration 9 obj = 0.7598952262399751 lambda_1 = 3737.555982461642 lambda_r = 48.45440082438159
At iteration 10 obj = 3.8112275584127744 lambda_1 = 3007.77195667800

([0.9553351468282614, 0.8983357406612887, 0.7956001838044323, 0.6399839290517407, 0.40947758337521367, 0.19129895026581192, -0.004017688288986293, -0.0010664346611118747, -0.011472740691958668, -0.03101613028905738], ScalableNewton.SFNLogger(0.0001, [9.0, 8.960276216274545, 8.919192368721983, 8.876848393122758, 8.833358222597024, 8.788845645129934, 8.743439783926007, 8.697270694566306, 8.650465526789011, 8.603145572425682  …  5.190257370522069, 5.10096646324127, 5.017596452081462, 4.940154016822082, 4.868490840233724, 4.8023909527510575, 4.741622850096979, 4.685966377884465, 4.635223050636682, 4.589216061322438], [202.0000000000002 202.35845069865906 … 1329.6767933557712 1338.517584087586; 202.00000000000006 202.15809678114442 … 893.2755715841301 904.7062735620372; … ; 201.87747038113054 201.84206921990733 … 198.67914552811405 198.65289317318118; 199.97303379668762 199.97178167168573 … 147.46880753397602 147.41345622085055]))

In [4]:
logger_gd.losses

101-element Vector{Float64}:
    9.0
 4809.0
    8.255681876505609e15
    5.951969133663163e53
    5.373886071433855e167
   Inf
   Inf
  NaN
  NaN
  NaN
  NaN
  NaN
  NaN
    ⋮
  NaN
  NaN
  NaN
  NaN
  NaN
  NaN
  NaN
  NaN
  NaN
  NaN
  NaN
  NaN

In [5]:
logger_lrsfn.losses

101-element Vector{Float64}:
 9.0
 8.960276216274545
 8.919192368721983
 8.876848393122758
 8.833358222597024
 8.788845645129934
 8.743439783926007
 8.697270694566306
 8.650465526789011
 8.603145572425682
 8.555424352834542
 8.507406732776987
 8.459188917000901
 ⋮
 5.384989880174876
 5.285178209632333
 5.190257370522069
 5.10096646324127
 5.017596452081462
 4.940154016822082
 4.868490840233724
 4.8023909527510575
 4.741622850096979
 4.685966377884465
 4.635223050636682
 4.589216061322438

In [6]:
logger_lrsfn.spectra[:,1]

9-element ElasticArrays.ElasticVector{Float64, Vector{Float64}}:
 202.0000000000002
 202.00000000000006
 202.0
 201.99999999999997
 201.99999999999997
 201.99999999999994
 201.9999999999999
 201.87747038113054
 199.97303379668762