continue 0.7 updates

robertfeldt · Jul 23, 2018 · aa08de7 · aa08de7
1 parent 7d180e6
commit aa08de7
Show file tree

Hide file tree

Showing 55 changed files with 245 additions and 224 deletions.
diff --git a/examples/benchmarking/compare_optimizers.jl b/examples/benchmarking/compare_optimizers.jl
@@ -318,15 +318,15 @@ function list_benchmark_db(db, saveResultCsvFile = nothing)
     # Get number of runs and median magnitude worse per method
     permethod = by(db, [:Method], df -> DataFrame(
         NumRuns = size(df, 1),
-        MedianLogTimesWorseFitness = round(median(df[:, :LogTimesWorseFitness]), 1),
+        MedianLogTimesWorseFitness = round(median(df[:, :LogTimesWorseFitness]), digits=1),
         )
     )
 
     # and merge with table with mean ranks of fitness and time.
     summarydf = by(sumdf, [:Method], df -> DataFrame(
-        MeanRank = round(mean(df[:,:RankFitness]), 3),
+        MeanRank = round(mean(df[:,:RankFitness]), digits=3),
         Num1sFitness = sum(map(r -> (r == 1) ? 1 : 0, df[:,:RankFitness])),
-        MeanRankTime = round(mean(df[:,:RankTime]), 3),
+        MeanRankTime = round(mean(df[:,:RankTime]), digits=3),
         Num1sTime = sum(map(r -> (r == 1) ? 1 : 0, df[:,:RankTime])),
         )
     )
@@ -381,7 +381,7 @@ function compare_optimizers_to_benchmarks(benchmarkfile, pset, optimizers, nreps
                 prob = BlackBoxOptim.example_problems[probname]
                 for r in 1:nreps
                     runnum += 1
-                    log("\n$(probname), n = $(numdims), optimizer = $(string(optmethod)), run $(r) of $(nreps) ($(round(100.0 * runnum / totalruns, 2))% of total runs)\n")
+                    log("\n$(probname), n = $(numdims), optimizer = $(string(optmethod)), run $(r) of $(nreps) ($(round(100.0 * runnum / totalruns, digits=2))% of total runs)\n")
                     ftn = fitness_for_opt(prob, numdims, popsize, ceil(Int, numfevals), optmethod)
                     push!(newfs, ftn)
                 end
@@ -426,7 +426,7 @@ function compare_optimizers_to_benchmarks(benchmarkfile, pset, optimizers, nreps
     # Report (in color) on number of significant differences after Benjamini-Hochberg
     # correction.
     color = (sum(df[:SignificantBH005]) > 0) ? :red : :green
-    print_with_color(color, 
+    print_with_color(color,
       "\nNum significant at Benjamini-Hochberg 0.05 level: ", string(sum(df[:SignificantBH005])),
       "\n")
 end
@@ -439,7 +439,7 @@ function benjamini_hochberg(pvals, alpha = 0.05)
   perm = sortperm(pvals)
   origperm = sortperm(perm)
   sortedps = pvals[perm]
-  
+
   tresholds = alpha * collect(1:n) / n
   khat = n+1 - findfirst(reverse(sortedps .<= tresholds))
   if khat > n

diff --git a/examples/ode_lorentz_attractor.jl b/examples/ode_lorentz_attractor.jl
@@ -7,12 +7,12 @@ function lorentz_equations(params::Vector{Float64}, r::Vector{Float64})
 
     # Extract the coordinates from the r vector
     x, y, z = r
-    
+
     # The Lorenz equations
     dx_dt = sigma*(y - x)
     dy_dt = x*(rho - z) - y
     dz_dt = x*y - beta*z
-    
+
     # Return the derivatives as a vector
     Float64[dx_dt, dy_dt, dz_dt]
 end
@@ -34,15 +34,15 @@ t_Xiang2015 = collect(tinterval_Xiang2015)
 # Initial position in space
 r0 = [0.1; 0.0; 0.0]
 
-# Constants sigma, rho and beta. In a real ODE problem these would not be known 
+# Constants sigma, rho and beta. In a real ODE problem these would not be known
 # and would be estimated.
 sigma = 10.0
 rho   = 28.0
 beta  = 8.0/3.0
 real_params = [sigma, rho, beta]
 
 # "Play" an ODE from a starting point and into the future given a sequence of time steps.
-function calc_state_vectors(params::Vector{Float64}, odefunc::Function, 
+function calc_state_vectors(params::Vector{Float64}, odefunc::Function,
     startx::Vector{Float64}, times::Vector{Float64}; states = nothing)
 
     numsamples = length(times)
@@ -85,7 +85,7 @@ function subsample(origstates::Array{Float64, 2}, times::Vector{Float64}, lenrat
     return origstates[:, indexes], times[indexes]
 end
 
-# The [Xiang2015] paper, https://www.hindawi.com/journals/ddns/2015/740721/, 
+# The [Xiang2015] paper, https://www.hindawi.com/journals/ddns/2015/740721/,
 # used these param bounds:
 Xiang2015Bounds = Tuple{Float64, Float64}[(9, 11), (20, 30), (2, 3)]
 
@@ -102,15 +102,15 @@ end
 origstates1, times1 = subsample(origstates, t, 0.04); # Sample only first 4%
 origstates1_Xiang2015, times1_Xiang2015 = subsample(origstates_Xiang2015, t_Xiang2015, 1.00);
 
-res1 = bboptimize(params -> lorentz_fitness(params, origstates1, times1); 
+res1 = bboptimize(params -> lorentz_fitness(params, origstates1, times1);
     SearchRange = Xiang2015Bounds, MaxSteps = 8e3)
 
-res2 = bboptimize(params -> lorentz_fitness(params, origstates1_Xiang2015, times1_Xiang2015); 
+res2 = bboptimize(params -> lorentz_fitness(params, origstates1_Xiang2015, times1_Xiang2015);
     SearchRange = Xiang2015Bounds, MaxSteps = 11e3) # They allow 12k fitness evals for 3-param estimation
 
 # But lets also try with less tight bounds
 LooserBounds = Tuple{Float64, Float64}[(0, 22), (0, 60), (1, 6)]
-res3 = bboptimize(params -> lorentz_fitness(params, origstates1_Xiang2015, times1_Xiang2015); 
+res3 = bboptimize(params -> lorentz_fitness(params, origstates1_Xiang2015, times1_Xiang2015);
     SearchRange = LooserBounds, MaxSteps = 11e3) # They allow 12k fitness evals for 3-param estimation
 
 println("Results on the long time sequence from Paulo Marques:")
@@ -129,4 +129,4 @@ println("Results on the short time sequence used in [Xiang2015] paper, but with
 estfitness = lorentz_fitness(best_candidate(res3), origstates_Xiang2015, t_Xiang2015)
 @show (estfitness, best_candidate(res3), best_fitness(res3))
 origfitness = lorentz_fitness(real_params, origstates_Xiang2015, t_Xiang2015)
-@show (origfitness, real_params)
+@show (origfitness, real_params)
diff --git a/examples/restarting_optimization.jl b/examples/restarting_optimization.jl
@@ -2,9 +2,7 @@ using BlackBoxOptim
 
 # Lets start and then restart an optimization of 2D rosenbrock
 # as in the README:
-function rosenbrock2d(x)
-  return (1.0 - x[1])^2 + 100.0 * (x[2] - x[1]^2)^2
-end
+rosenbrock2d(x) = abs2(1.0 - x[1]) + 100.0 * abs2(x[2] - x[1]^2)
 
 # If you want to restart optimization you need to get a handle
 # to the optimizer, problem and params. Use the function

diff --git a/examples/rosenbrock_parallel.jl b/examples/rosenbrock_parallel.jl
@@ -8,21 +8,21 @@ using BlackBoxOptim
 # define the function to optimize on all workers. Parallel eval only gives a gain
 # if function to optimize is slow. For this example we introduce a fake sleep
 # to make it slow since the function is actually very quick to eval...
-@everywhere function rosenbrock(x)
+@everywhere function slow_rosenbrock(x)
   sleep(0.001) # Fake a slower func to be optimized...
-  return( sum( 100*( x[2:end] - x[1:end-1].^2 ).^2 + ( x[1:end-1] - 1 ).^2 ) )
+  return BlackBoxOptim.rosenbrock(x)
 end
 
 # First run without any parallel procs used in eval
-opt1 = bbsetup(rosenbrock; Method=:xnes, SearchRange = (-5.0, 5.0),
+opt1 = bbsetup(slow_rosenbrock; Method=:xnes, SearchRange = (-5.0, 5.0),
                NumDimensions = 50, MaxFuncEvals = 5000)
 tic()
 res1 = bboptimize(opt1)
 t1 = round(toq(), digits=3)
 
 # When Workers= option is given, BlackBoxOptim enables parallel
 # evaluation of fitness using the specified worker processes
-opt2 = bbsetup(rosenbrock; Method=:xnes, SearchRange = (-5.0, 5.0),
+opt2 = bbsetup(slow_rosenbrock; Method=:xnes, SearchRange = (-5.0, 5.0),
                NumDimensions = 50, MaxFuncEvals = 5000, Workers = workers())
 tic()
 res2 = bboptimize(opt2)

diff --git a/examples/save_and_load_optimization_state_to_disc.jl b/examples/save_and_load_optimization_state_to_disc.jl
@@ -3,13 +3,11 @@ using BlackBoxOptim
 # Lets start an optimization of 2D rosenbrock
 # as in the README, then save its state to disc,
 # read it back in and continue optimization.
-function rosenbrock2d(x)
-  return (1.0 - x[1])^2 + 100.0 * (x[2] - x[1]^2)^2
-end
+rosenbrock2d(x) = abs2(1.0 - x[1]) + 100.0 * abs2(x[2] - x[1]^2)
 
 # If you want to restart optimization you need to get a handle
 # to an optimization controller. Use the function bbsetup
-# instead of bboptimize, but with the same parameters. 
+# instead of bboptimize, but with the same parameters.
 # We just run it for 100 steps though:
 optctrl = bbsetup(rosenbrock2d; SearchRange = (-5.0, 5.0), NumDimensions = 2,
     MaxSteps = 100, TraceMode = :silent);

diff --git a/examples/visualize_crossover.jl b/examples/visualize_crossover.jl
@@ -12,7 +12,7 @@ function plot_crossover(xover::CrossoverOperator{NP,NC}, parents::Matrix{Float64
     @assert size(parents, 2) == NP
     parents_pop = FitPopulation(parents, NaN)
     parent_ixs = collect(1:NP)
-    children_mtx = hcat([hcat(apply!(xover, [Individual(size(parents, 1)) for _ in 1:NC],
+    children_mtx = hcat([hcat(apply!(xover, [fill!(Individual(undef, size(parents, 1)), NaN) for _ in 1:NC],
                               zeros(Int, NC), parents_pop,
                               shuffle_parents ? shuffle(parent_ixs) : parent_ixs)...) for _ in 1:n]...)
     plot(layer(x=parents_pop.individuals[1,:], y=parents_pop.individuals[2,:], Geom.point, # parents

diff --git a/spikes/adaptive_coordinate_descent.jl b/spikes/adaptive_coordinate_descent.jl
@@ -148,7 +148,7 @@ function predicted_final_fitness(currentfevals, lastfevals, maxfevals, currentfi
 end
 
 function generate_random_orthogonal_transformation(P)
-  B = eye(P,P)
+  B = Matrix{Float64}(I, P,P)
   for i = 1:P
     v = randn(P,1)
     while norm(v) < 1e-2
@@ -166,7 +166,7 @@ end
 mutable struct NoEncoding
   B
   NoEncoding(mu, P) = begin
-    new(eye(P, P))
+    new(Matrix{Float64}(I, P, P))
   end
 end
 
@@ -213,8 +213,8 @@ mutable struct AdaptiveEncoding
       zeros(P), # pc
       zeros(P), # pcmu
       population * weights, # xmean
-      eye(P), # C
-      eye(P), # Cold
+      Matrix{Float64}(I, P, P), # C
+      Matrix{Float64}(I, P, P), # Cold
       ones(P), # diagD
       0, # ps
       0  # iter
@@ -286,12 +286,12 @@ function update!(ae::AdaptiveEncoding, population::Matrix{Float64}, howOftenUpda
     if minimum(eigenvalues) <= 0
       eigenvalues[eigenvalues .< 0] = 0
       tmp = maximum(eigenvalues) / cond
-      ae.C = ae.C .+ tmp * eye(ae.P)
+      ae.C = ae.C .+ tmp * Matrix{Float64}(I, ae.P, ae.P)
       eigenvalues += tmp * ones(ae.P,1)
     end
     if maximum(eigenvalues) > cond*minimum(eigenvalues)
       tmp = maximum(eigenvalues)/cond - minimum(eigenvalues)
-      ae.C = ae.C .+ tmp * eye(ae.P)
+      ae.C = ae.C .+ tmp * Matrix{Float64}(I, ae.P, ae.P)
       eigenvalues += tmp*ones(ae.P,1)
     end
     if (minimum(eigenvalues) <= 0)

diff --git a/spikes/adaptive_coordinate_descent_w_adapted_coord_freqs.jl b/spikes/adaptive_coordinate_descent_w_adapted_coord_freqs.jl
@@ -3,11 +3,11 @@ include("../src/frequency_adaptation.jl")
 # Combine Loschilov's ACD with Glasmachers ACF-CD. Initial testing indicated it is 3-20 times
 # worse than just ACD. I guess the ACD needs new fitness evals in each direction of the transformation
 # matrix in order to reliably update it. So maybe only use ACF when increasing the population size, i.e.
-# ensure that all directions have been tried at least once and then add additional once from the frequency 
+# ensure that all directions have been tried at least once and then add additional once from the frequency
 # selected directions?
 function acf_adaptive_coordinate_descent(fitnessfct, P, Xmin, Xmax;
-  MaxEval = 1e4 * P, 
-  stopfitness = 1e-8, 
+  MaxEval = 1e4 * P,
+  stopfitness = 1e-8,
   howOftenUpdateRotation = 1,
   numfevals = 0,
   kSuccess = 2.0,
@@ -124,8 +124,8 @@ function acf_adaptive_coordinate_descent(fitnessfct, P, Xmin, Xmax;
           println("Restart!")
           # Recursive call to restart but express how many function evals we have already done...
           xm2, bf2, nf2 = acf_adaptive_coordinate_descent(fitnessfct, P, Xmin, Xmax;
-            MaxEval = MaxEval, stopfitness = stopfitness, 
-            howOftenUpdateRotation = howOftenUpdateRotation, 
+            MaxEval = MaxEval, stopfitness = stopfitness,
+            howOftenUpdateRotation = howOftenUpdateRotation,
             numfevals = numfevals)
           if bf2 < bestFit
             return (xm2, bf2, nf2)
@@ -156,7 +156,7 @@ function predicted_final_fitness(currentfevals, lastfevals, maxfevals, currentfi
 end
 
 function generate_random_orthogonal_transformation(P)
-  B = eye(P,P)
+  B = Matrix{Float64}(I, P, P)
   for i = 1:P
     v = randn(P,1)
     while norm(v) < 1e-2
@@ -173,9 +173,7 @@ end
 # Use this when no adaptive encoding is used, i.e. the ACD algorithm is a normal CD.
 mutable struct NoEncoding
   B
-  NoEncoding(mu, P) = begin
-    new(eye(P, P))
-  end
+  NoEncoding(mu, P) = new(Matrix{Float64}(I, P, P))
 end
 
 function update!(e::NoEncoding, population::Matrix{Float64}, howOftenUpdateRotation = 1)
@@ -215,8 +213,8 @@ mutable struct AdaptiveEncoding
       zeros(P), # pc
       zeros(P), # pcmu
       population * weights, # xmean
-      eye(P), # C
-      eye(P), # Cold
+      Matrix{Float64}(I, P, P), # C
+      Matrix{Float64}(I, P, P), # Cold
       ones(P), # diagD
       0, # ps
       0  # iter
@@ -247,13 +245,13 @@ function update!(ae::AdaptiveEncoding, population::Matrix{Float64}, howOftenUpda
 
   # Line 10: Calculate z0 but check for the denominator being zero...
   xdiff = ae.xmean .- xold
-  denom = sum((ae.invB * xdiff).^2)
+  denom = sum(abs2, ae.invB * xdiff)
   if denom == 0
     z0 = zeros(ae.P)
   else
     z0 = (sqrt(ae.P) / sqrt(denom)) * xdiff
   end
-  
+
   # Line 11&13: Calc zi's and then Cmu.
   #ae.cmu = 0.0
   if ae.cmu != 0.0
@@ -265,7 +263,7 @@ function update!(ae::AdaptiveEncoding, population::Matrix{Float64}, howOftenUpda
     Cmu = 1 # dummy just so it is defined
   end
 
-  # Line 12: Update p 
+  # Line 12: Update p
   ae.pc = (1-ae.cp) * ae.pc .+ sqrt(ae.cp*(2-ae.cp)) * z0
 
   # Line 14: Update C.
@@ -288,12 +286,12 @@ function update!(ae::AdaptiveEncoding, population::Matrix{Float64}, howOftenUpda
     if minimum(eigenvalues) <= 0
       eigenvalues[eigenvalues .< 0] = 0
       tmp = maximum(eigenvalues) / cond
-      ae.C = ae.C .+ tmp * eye(ae.P)
+      ae.C = ae.C .+ tmp * Matrix{Float64}(I, ae.P, ae.P)
       eigenvalues += tmp * ones(ae.P,1)
     end
     if maximum(eigenvalues) > cond*minimum(eigenvalues)
       tmp = maximum(eigenvalues)/cond - minimum(eigenvalues)
-      ae.C = ae.C .+ tmp * eye(ae.P)
+      ae.C = ae.C .+ tmp * Matrix{Float64}(I, ae.P, ae.P)
       eigenvalues += tmp*ones(ae.P,1)
     end
     if (minimum(eigenvalues) <= 0)
@@ -302,7 +300,7 @@ function update!(ae::AdaptiveEncoding, population::Matrix{Float64}, howOftenUpda
     end
 
     try
-      ae.diagD = sqrt(eigenvalues) 
+      ae.diagD = sqrt(eigenvalues)
       # Use Cholesky instead??
       ae.B = ae.Bo * diagm(ae.diagD) # This is sometimes a matrix rather than a vector. Strange!
       ae.invB = diagm(1 ./ ae.diagD) * ae.Bo'

diff --git a/spikes/adaptive_encoding.jl b/spikes/adaptive_encoding.jl
@@ -18,8 +18,8 @@ mutable struct AdaptiveEncoding <: CoordinateTransform
     c_p = 1.0 / sqrt(n)
     c_mu = c_1 = 0.5 / n
     p = zeros(n, 1)
-    C = eye(n, n)
-    B = eye(n, n)
+    C = Matrix{Float64}(I, n, n)
+    B = Matrix{Float64}(I, n, n)
     new(n, sqrt(n), false, c_p, c_mu, c_1,
       p, C, B, zeros(n, 1), zeros(n, 1))
   end
@@ -63,7 +63,7 @@ function update_transform!(ae::AdaptiveEncoding, x::Array{Float64, 2}, us::Array
 
   # Should we enforce symmetry in C?
   # tc = triu(ae.C)
-  # ae.C = tc + tc' - eye(ae.C) * diag(ae.C)
+  # ae.C = tc + tc' - Matrix{Float64}(I, ae.C, ae.C) * diag(ae.C)
 
   # Now B is the square root of the covar matrix, but faster to do eig decomposition.
   eigvalues, eigvectors = eig(ae.C)

diff --git a/spikes/cec14_exp1_tune_one_run_cmsa_es/cec_cmsa_es.jl b/spikes/cec14_exp1_tune_one_run_cmsa_es/cec_cmsa_es.jl
@@ -178,7 +178,7 @@ mutable struct EigenCovarSampler <: CovarianceMatrixSampler
   diagD::Array{Float64,1}
 
   EigenCovarSampler(n) = begin
-    new(eye(n,n), eye(n,n), ones(n))
+    new(Matrix{Float64}(I, n,n), Matrix{Float64}(I, n,n), ones(n))
   end
 end
 
@@ -201,7 +201,7 @@ mutable struct CholeskyCovarSampler <: CovarianceMatrixSampler
   sqrtC::Array{Float64,2}
 
   CholeskyCovarSampler(n) = begin
-    new(eye(n,n), eye(n,n))
+    new(Matrix{Float64}(I, n,n), Matrix{Float64}(I, n,n))
   end
 end
 

diff --git a/spikes/comparing_covariate_selection_schemes.jl b/spikes/comparing_covariate_selection_schemes.jl
@@ -4,12 +4,16 @@
 #
 function rosenbrock(x)
   n = length(x)
-  return( sum( 100*( x[2:n] - x[1:(n-1)].^2 ).^2 + ( x[1:(n-1)] - 1 ).^2 ) )
+  return sum( 100*( x[2:n] - x[1:(n-1)].^2 ).^2 + ( x[1:(n-1)] - 1 ).^2 )
 end
+# FIXME this should be a more efficient implementation, but switching to it would reset all benchmarks
+#rosenbrock(x) = sum(i -> 100*abs2(x[i+1] - x[i]^2) + abs2(x[i] - 1), Base.OneTo(length(x)-1))
 
 function sphere(x)
   sum(x.^2)
 end
+# FIXME this should be a more efficient implementation, but switching to it would reset all benchmarks
+#sphere(x) = sum(abs2, x)
 
 mutable struct SparseShiftedProblem
   active_factors::Array{Int, 1}

diff --git a/spikes/compass_search.jl b/spikes/compass_search.jl
@@ -12,7 +12,7 @@ function compass_search(f, n; max_fevals = 1e6, delta_tol = 1e-20,
     x = randn(n, 1)
   end
 
-  directions = [eye(n,n) -eye(n, n)]
+  directions = [Matrix{Float64}(I, n,n) -Matrix{Float64}(I, n, n)]
 
   num_fevals = 0
   fc = fbest = Inf

diff --git a/spikes/covar_matrix_adaptation_es.jl b/spikes/covar_matrix_adaptation_es.jl
@@ -154,7 +154,7 @@ end
 ############################################################################
 # initialization:
 ############################################################################
-#Cov = eye(n);           # initial covariance matrix
+#Cov = Matrix{Float64}(I, n, n);           # initial covariance matrix
 # initializing individual population:
 #Individual.y = yInit;
 #Individual.w = 0;