Skip to content

HTTPS clone URL

Subversion checkout URL

You can clone with HTTPS or Subversion.

Download ZIP
Browse files

All debugged L-BFGS implementation.

Also solidified APIs across different optimization packages.
  • Loading branch information...
commit 9af2449b824158b83e6b9fa7d6ca8d0b6b1ea1b9 1 parent 0e0317d
Clement Farabet clementfarabet authored
4 asgd.lua
View
@@ -64,6 +64,6 @@ function optim.asgd(opfunc, x, state)
state.eta_t = state.eta0 / math.pow((1 + state.lambda * state.eta0 * state.t), state.alpha)
state.mu_t = 1 / math.max(1, state.t - state.t0)
- -- return f(x_old), x_new, and averaged x
- return x,fx,state.ax
+ -- return x*, f(x) before optimization, and average(x_t0,x_t1,x_t2,...)
+ return x,{fx},state.ax
end
22 cg.lua
View
@@ -12,10 +12,19 @@
-- opfunc : a function that takes a single input, the point of evaluation.
-- x : the initial point
-- params : a table of parameters and temporary allocations.
--- params.length : max number of function evaluations
+-- params.maxEval : max number of function evaluations
+-- params.maxIter : max number of iterations
-- params.df[0,1,2,3] : if you pass torch.Tensor they will be used for temp storage
-- params.[s,x0] : if you pass torch.Tensor they will be used for temp storage
--
+-- RETURN:
+-- x* : the new x vector, at the optimal point
+-- f : a table of all function values:
+-- f[1] is the value of the function before any optimization
+-- f[#f] is the final fully optimized value, at x*
+--
+-- (Koray Kavukcuoglu, 2012)
+--
function optim.cg(opfunc, x, params)
-- parameters
local params = params or {}
@@ -23,9 +32,9 @@ function optim.cg(opfunc, x, params)
local sig = params.sig or 0.5
local int = params.int or 0.1
local ext = params.ext or 3.0
- local max = params.max or 20
+ local maxIter = params.maxIter or 20
local ratio = params.ratio or 100
- local length = params.length or 25
+ local maxEval = params.maxEval or maxIter*1.25
local red = 1
local verbose = params.verbose or 0
@@ -61,6 +70,7 @@ function optim.cg(opfunc, x, params)
-- evaluate at initial point
f1,tdf = opfunc(x)
+ fx[#fx+1] = f1
df1:copy(tdf)
i=i+1
@@ -70,7 +80,7 @@ function optim.cg(opfunc, x, params)
d1 = -s:dot(s ) -- slope
z1 = red/(1-d1) -- initial step
- while i < math.abs(length) do
+ while i < math.abs(maxEval) do
x0:copy(x)
f0 = f1
@@ -82,7 +92,7 @@ function optim.cg(opfunc, x, params)
i=i+1
d2 = df2:dot(s)
f3,d3,z3 = f1,d1,-z1 -- init point 3 equal to point 1
- local m = math.min(max,length-i)
+ local m = math.min(maxIter,maxEval-i)
local success = 0
local limit = -1
@@ -169,7 +179,7 @@ function optim.cg(opfunc, x, params)
x:copy(x0)
f1 = f0
df1:copy(df0)
- if ls_failed or i>length then
+ if ls_failed or i>maxEval then
break
end
local tmp = df1:clone()
60 lbfgs.lua
View
@@ -1,9 +1,16 @@
----------------------------------------------------------------------
-- An implementation of L-BFGS, heavily inspired from minFunc.
--
--- For now, we only implement one type of line search:
--- Bracketing w/ Cubic Interpolation/Extrapolation
--- with function + gradient values (Wolfe Criterion)
+-- This implementation of L-BFGS relies on a user-provided line
+-- search function (state.lineSearch). If this function is not
+-- provided, then a simple learningRate is used to produce fixed
+-- size steps. Fixed size steps are much less costly than line
+-- searches, and can be useful for stochastic problems.
+--
+-- The learning rate is used even when a line search is provided.
+-- This is also useful for large-scale stochastic problems, where
+-- opfunc is a noisy approximation of f(x). In that case, the learning
+-- rate allows a reduction of confidence in the step size.
--
-- ARGS:
-- opfunc : a function that takes a single input (X), the point of
@@ -11,20 +18,26 @@
-- x : the initial point
-- state : a table describing the state of the optimizer; after each
-- call the state is modified
--- state.maxIter : Maximum number of iterations allowed
--- state.maxEval : Maximum number of function evaluations
--- state.tolFun : Termination tolerance on the first-order optimality
--- state.tolX : Termination tol on progress in terms of func/param changes
+-- state.maxIter : Maximum number of iterations allowed
+-- state.maxEval : Maximum number of function evaluations
+-- state.tolFun : Termination tolerance on the first-order optimality
+-- state.tolX : Termination tol on progress in terms of func/param changes
+-- state.lineSearch : A line search function
+-- state.learningRate : If no line search provided, then a fixed step size is used
--
-- RETURN:
--- x : the new x vector
--- f(x) : the function value, at the optimal point
+-- x* : the new x vector, at the optimal point
+-- f : a table of all function values:
+-- f[1] is the value of the function before any optimization
+-- f[#f] is the final fully optimized value, at x*
+--
+-- (Clement Farabet, 2012)
--
function optim.lbfgs(opfunc, x, state)
-- get/update state
local state = state or {}
local maxIter = tonumber(state.maxIter) or 20
- local maxEval = tonumber(state.maxEval) or 40
+ local maxEval = tonumber(state.maxEval) or maxIter*1.25
local tolFun = state.tolFun or 1e-5
local tolX = state.tolX or 1e-9
local nCorrection = state.nCorrection or 100
@@ -47,9 +60,6 @@ function optim.lbfgs(opfunc, x, state)
local abs = math.abs
local min = math.min
- -- initial step length
- local t = 1
-
-- evaluate initial f(x) and df/dx
local f,g = opfunc(x)
local f_hist = {f}
@@ -57,7 +67,7 @@ function optim.lbfgs(opfunc, x, state)
state.funcEval = state.funcEval + 1
-- check optimality of initial point
- if g:abs():sum() <= tolFun then
+ if g:clone():abs():sum() <= tolFun then
-- optimality condition below tolFun
verbose('optimality condition below tolFun')
return x,f
@@ -65,7 +75,7 @@ function optim.lbfgs(opfunc, x, state)
-- optimize for a max of maxIter iterations
local nIter = 0
- local d,old_dirs,old_stps,Hdiag,g_old,f_old
+ local d,old_dirs,old_stps,Hdiag,g_old,f_old,t
while nIter < maxIter do
-- keep track of nb of iterations
nIter = nIter + 1
@@ -75,8 +85,8 @@ function optim.lbfgs(opfunc, x, state)
------------------------------------------------------------
if nIter == 1 then
d = -g
- old_dirs = {zeros(g:size())}
- old_stps = {zeros(d:size())}
+ old_dirs = {}
+ old_stps = {}
Hdiag = 1
else
-- do lbfgs update (update memory)
@@ -154,7 +164,7 @@ function optim.lbfgs(opfunc, x, state)
-- reset initial guess for step size
if nIter == 1 then
- t = min(1,1/g:abs():sum())
+ t = min(1,1/g:clone():abs():sum()) * learningRate
else
t = learningRate
end
@@ -162,16 +172,14 @@ function optim.lbfgs(opfunc, x, state)
-- optional line search: user function
local lsFuncEval = 0
if lineSearch and type(lineSearch) == 'function' then
- -- perform line search, satisfying Wolfe condition
+ -- perform line search, using user function
f,g,x,t,lsFuncEval = lineSearch(opfunc,x,t,d,f,g,gtd,c1,c2,tolX)
append(f_hist, f)
-
- -- from minFunc:
- --[t,f,g,lsFuncEval] = WolfeLineSearch(x,t,d,f,g,gtd,c1,c2,LS=4,25,tolX,false,false,1,opfunc)
else
- -- no line search, simply re-evaluate (costly & stupid but needed by check below)
- x:add(d*t)
+ -- no line search, simply move with fixed-step and re-evaluate f(x)
+ x:add(t,d)
f,g = opfunc(x)
+ lsFuncEval = 1
append(f_hist, f)
end
@@ -182,7 +190,7 @@ function optim.lbfgs(opfunc, x, state)
------------------------------------------------------------
-- check conditions
------------------------------------------------------------
- if g:abs():sum() <= tolFun then
+ if g:clone():abs():sum() <= tolFun then
-- check optimality
verbose('optimality condition below tolFun')
break
@@ -208,5 +216,5 @@ function optim.lbfgs(opfunc, x, state)
end
-- return optimal x, and history of f(x)
- return x,f_hist
+ return x,f_hist,currentFuncEval
end
4 sgd.lua
View
@@ -63,6 +63,6 @@ function optim.sgd(opfunc, x, state)
-- (5) update evaluation counter
state.evalCounter = state.evalCounter + 1
- -- return f(x_old), and x_new
- return x,fx
+ -- return x*, f(x) before optimization
+ return x,{fx}
end
2  test/test_cg.lua
View
@@ -6,7 +6,7 @@ dofile('l2.lua')
x = torch.Tensor(2):fill(0)
-x,fx,i=optim.cg(rosenbrock,x,{length=500})
+x,fx,i=optim.cg(rosenbrock,x,{maxIter=50})
print()
print('Rosenbrock test: compare with http://www.gatsby.ucl.ac.uk/~edward/code/minimize/example.html')
16 test/test_lbfgs.lua
View
@@ -0,0 +1,16 @@
+require 'lab'
+require 'optim'
+require 'plot'
+dofile 'rosenbrock.lua'
+dofile 'l2.lua'
+
+x = torch.Tensor(2):fill(0)
+x,fx,i=optim.lbfgs(rosenbrock,x,{maxIter=100, verbose=true, learningRate=1e-1})
+
+print()
+print('Rosenbrock test')
+print()
+print('Number of function evals = ',i)
+print('x=');print(x)
+print('fx=')
+for i=1,#fx do print(i,fx[i]); end
23 test/test_sgd.lua
View
@@ -0,0 +1,23 @@
+require 'lab'
+require 'optim'
+require 'plot'
+dofile 'rosenbrock.lua'
+dofile 'l2.lua'
+
+x = torch.Tensor(2):fill(0)
+fx = {}
+
+config = {eta0=1e-3, t0=1000}
+for i = 1,10001 do
+ x,f=optim.sgd(rosenbrock,x,config)
+ if (i-1)%1000 == 0 then
+ table.insert(fx,f[1])
+ end
+end
+
+print()
+print('Rosenbrock test')
+print()
+print('x=');print(x)
+print('fx=')
+for i=1,#fx do print((i-1)*1000+1,fx[i]); end
Please sign in to comment.
Something went wrong with that request. Please try again.