diff --git a/src/AbstractOperators.jl b/src/AbstractOperators.jl index 24fdfb0..64e170c 100644 --- a/src/AbstractOperators.jl +++ b/src/AbstractOperators.jl @@ -2,24 +2,24 @@ __precompile__() module AbstractOperators -const RealOrComplex{T<:Real} = Union{T, Complex{T}} - abstract type AbstractOperator end abstract type LinearOperator <: AbstractOperator end abstract type NonLinearOperator <: AbstractOperator end -import Base: A_mul_B!, Ac_mul_B! +import Base: A_mul_B!, Ac_mul_B! export LinearOperator, NonLinearOperator, AbstractOperator -# deep stuff +# Block stuff + +include("utilities/block.jl") +#include("utilities/deep.jl") # TODO: remove this eventually -include("utilities/deep.jl") +# Predicates and properties -# predicates and properties include("properties.jl") # Linear operators @@ -43,7 +43,7 @@ include("linearoperators/Filt.jl") include("linearoperators/MIMOFilt.jl") include("linearoperators/Xcorr.jl") include("linearoperators/LBFGS.jl") -include("linearoperators/BlkDiagLBFGS.jl") +# include("linearoperators/BlkDiagLBFGS.jl") # Calculus rules @@ -67,7 +67,4 @@ include("nonlinearoperators/SoftPlus.jl") # Syntax include("syntax.jl") - - - end diff --git a/src/linearoperators/BlkDiagLBFGS.jl b/src/linearoperators/BlkDiagLBFGS.jl deleted file mode 100644 index c332a73..0000000 --- a/src/linearoperators/BlkDiagLBFGS.jl +++ /dev/null @@ -1,128 +0,0 @@ - -mutable struct BlkDiagLBFGS{M, N, R<:Real, A <:NTuple{N,AbstractArray}, B <:NTuple{M,A}} <: LinearOperator - currmem::Int - curridx::Int - s::A - y::A - s_m::B - y_m::B - ys_m::Array{Float64,1} - alphas::Array{Float64,1} - H::R -end - -#constructors -#default -function LBFGS(T::NTuple{N,Type}, dim::NTuple{N,NTuple}, M::Int) where {N} - s_m = tuple([deepzeros(T,dim) for i = 1:M]...) - y_m = tuple([deepzeros(T,dim) for i = 1:M]...) - s = deepzeros(T,dim) - y = deepzeros(T,dim) - R = real(T[1]) - ys_m = zeros(R, M) - alphas = zeros(R, M) - BlkDiagLBFGS{M,N,R,typeof(s),typeof(s_m)}(0, 0, s, y, s_m, y_m, ys_m, alphas, 1.) -end - -LBFGS(x::NTuple{N,AbstractArray},M::Int64) where {N} = LBFGS(eltype.(x),size.(x),M) - -#mappings - -@generated function update!(L::BlkDiagLBFGS{M,N,R,A,B}, - x::A, - x_prev::A, - gradx::A, - gradx_prev::A) where {M,N,R,A,B} - - ex = :(ys = 0.) - for i = 1:N - ex = :($ex; ys += update_s_y(L.s[$i],L.y[$i],x[$i],x_prev[$i],gradx[$i],gradx_prev[$i])) - end - ex2 = :(yty = 0.) - for i = 1:N - ex2 = :($ex2; yty += update_s_m_y_m(L.s_m[L.curridx][$i],L.y_m[L.curridx][$i], - L.s[$i],L.y[$i] )) - end - - ex = quote - $ex - if ys > 0 - L.curridx += 1 - if L.curridx > M L.curridx = 1 end - L.currmem += 1 - if L.currmem > M L.currmem = M end - - $ex2 - L.ys_m[L.curridx] = ys - L.H = ys/sum(yty) - end - return L - end -end - -function update_s_y(s::A, y::A, x::A, x_prev::A, gradx::A, gradx_prev::A) where {A} - s .= (-).(x, x_prev) - y .= (-).(gradx, gradx_prev) - ys = real(vecdot(s,y)) - return ys -end - -function update_s_m_y_m(s_m::A,y_m::A,s::A,y::A) where {A} - s_m .= s - y_m .= y - - yty = real(vecdot(y,y)) - return yty -end - -function A_mul_B!(d::A, L::BlkDiagLBFGS{M,N,R,A,B}, gradx::A) where {M, N, R, A, B} - for i = 1:N - d[i] .= (-).(gradx[i]) - end - idx = loop1!(d,L) - for i = 1:N - d[i] .= (*).(L.H, d[i]) - end - d = loop2!(d,idx,L) -end - -function loop1!(d::A, L::BlkDiagLBFGS{M,N,R,A,B}) where {M, N, R, A, B} - idx = L.curridx - for i=1:L.currmem - L.alphas[idx] = sum(real.(vecdot.(L.s_m[idx], d))) - - L.alphas[idx] /= L.ys_m[idx] - for ii = 1:N - d[ii] .-= L.alphas[idx].*L.y_m[idx][ii] - end - idx -= 1 - if idx == 0 idx = M end - end - return idx -end - -function loop2!(d::A, idx::Int, L::BlkDiagLBFGS{M,N,R,A,B}) where {M, N, R, A, B} - for i=1:L.currmem - idx += 1 - if idx > M idx = 1 end - beta = sum(real.(vecdot.(L.y_m[idx], d))) - beta /= L.ys_m[idx] - for ii = 1:N - d[ii] .-= (beta-L.alphas[idx]).*L.s_m[idx][ii] - end - end - return d -end - -function reset(L::BlkDiagLBFGS) - L.currmem = 0 - L.curridx = 0 -end - -# Properties - -domainType(L::BlkDiagLBFGS) = eltype.(L.s) -codomainType(L::BlkDiagLBFGS) = eltype.(L.s) - -fun_name(A::BlkDiagLBFGS) = "LBFGS" -size(A::BlkDiagLBFGS) = (size.(A.s), size.(A.s)) diff --git a/src/linearoperators/LBFGS.jl b/src/linearoperators/LBFGS.jl index f1459b8..710044b 100644 --- a/src/linearoperators/LBFGS.jl +++ b/src/linearoperators/LBFGS.jl @@ -1,14 +1,11 @@ export LBFGS, update! -# TODO make Ac_mul_B! -# Edit: Ac_mul_B! is not really needed for this operator -# Edit2: you never known! anyway for completeness would be cool to have it! """ -`LBFGS(T::Type, dim::Tuple, Memory::Int)` +`LBFGS(domainType::Type,dim_in::Tuple, M::Integer)` -`LBFGS{N}(T::NTuple{N,Type}, dim::NTuple{N,Tuple}, M::Int)` +`LBFGS(dim_in::Tuple, M::Integer)` -`LBFGS(x::AbstractArray, Memory::Int)` +`LBFGS(x::AbstractArray, M::Integer)` Construct a Limited-Memory BFGS `LinearOperator` with memory `M`. The memory of `LBFGS` can be updated using the function `update!`, where the current iteration variable and gradient (`x`, `grad`) and the previous ones (`x_prev` and `grad_prev`) are needed: @@ -16,117 +13,113 @@ Construct a Limited-Memory BFGS `LinearOperator` with memory `M`. The memory of julia> L = LBFGS(Float64,(4,),5) LBFGS ℝ^4 -> ℝ^4 -julia> update!(L,x,x_prev,grad,grad_prev); #update memory +julia> update!(L,x,x_prev,grad,grad_prev); # update memory -julia> d = L*x; #compute new direction +julia> d = L*grad; # compute new direction ``` - """ -mutable struct LBFGS{M, N, R <: Real, T <: Union{R, Complex{R}}, A<:AbstractArray{T,N}} <: LinearOperator - currmem::Int - curridx::Int - s::A - y::A - s_m::NTuple{M, A} - y_m::NTuple{M, A} - ys_m::Array{R, 1} +mutable struct LBFGS{R, T <: BlockArray, M, I <: Integer} <: LinearOperator + currmem::I + curridx::I + s::T + y::T + s_M::Array{T, 1} + y_M::Array{T, 1} + ys_M::Array{R, 1} alphas::Array{R, 1} H::R end -# Constructors -#default -function LBFGS(T::Type, dim::NTuple{N,Int}, M::Int) where {N} - s_m = tuple([deepzeros(T,dim) for i = 1:M]...) - y_m = tuple([deepzeros(T,dim) for i = 1:M]...) - s = deepzeros(T,dim) - y = deepzeros(T,dim) - R = real(T) - ys_m = zeros(R, M) +#default constructor + +function LBFGS(domainType, dim_in, M::I) where {I <: Integer} + s_M = [blockzeros(domainType, dim_in) for i = 1:M] + y_M = [blockzeros(domainType, dim_in) for i = 1:M] + s = blockzeros(domainType, dim_in) + y = blockzeros(domainType, dim_in) + T = typeof(s) + R = typeof(domainType) <: Tuple ? real(domainType[1]) : real(domainType) + ys_M = zeros(R, M) alphas = zeros(R, M) - LBFGS{M,N,R,T,typeof(s)}(0, 0, s, y, s_m, y_m, ys_m, alphas, one(R)) + LBFGS{R, T, M, I}(0, 0, s, y, s_M, y_M, ys_M, alphas, one(R)) end -LBFGS(x::AbstractArray,M::Int) = LBFGS(eltype(x),size(x),M) +function LBFGS(dim_in, M::I) where {I <: Integer} + domainType = eltype(dim_in) <: Integer ? Float64 : ([Float64 for i in eachindex(dim_in)]...) + LBFGS(domainType, dim_in, M) +end + +function LBFGS(x::T, M::I) where {T <: BlockArray, I <: Integer} + domainType = blockeltype(x) + dim_in = blocksize(x) + LBFGS(domainType, dim_in, M) +end """ `update!(L::LBFGS, x, x_prex, grad, grad_prev)` -See `LBFGS` documentation. - +See the documentation for `LBFGS`. """ -function update!(L::LBFGS{M,N,R,T,A}, - x::A, - x_prev::A, - gradx::A, - gradx_prev::A) where {M,N,R,T,A} - - ys = update_s_y(L,x,x_prev,gradx,gradx_prev) - +function update!(L::LBFGS{R, T, M, I}, x::T, x_prev::T, gradx::T, gradx_prev::T) where {R, T, M, I} + L.s .= x .- x_prev + L.y .= gradx .- gradx_prev + ys = real(blockvecdot(L.s, L.y)) if ys > 0 L.curridx += 1 if L.curridx > M L.curridx = 1 end L.currmem += 1 if L.currmem > M L.currmem = M end - - - yty = update_s_m_y_m(L,L.curridx) - L.ys_m[L.curridx] = ys + L.ys_M[L.curridx] = ys + blockcopy!(L.s_M[L.curridx], L.s) + blockcopy!(L.y_M[L.curridx], L.y) + yty = real(blockvecdot(L.y, L.y)) L.H = ys/yty end return L end -function update_s_y(L::LBFGS{M,N,R,T,A}, x::A, x_prev::A, gradx::A, gradx_prev::A) where {M,N,R,T,A} - L.s .= (-).(x, x_prev) - L.y .= (-).(gradx, gradx_prev) - ys = real(vecdot(L.s,L.y)) - return ys -end +# LBFGS operators are symmetric -function update_s_m_y_m(L::LBFGS{M,N,R,T,A}, curridx::Int) where {M,N,R,T,A} - L.s_m[curridx] .= L.s - L.y_m[curridx] .= L.y +Ac_mul_B!(x::T, L::LBFGS{R, T, M, I}, y::T) where {R, T, M, I} = A_mul_B!(x, L, y) - yty = real(vecdot(L.y,L.y)) - return yty -end +# Two-loop recursion -function A_mul_B!(d::A, L::LBFGS{M,N,R,T,A}, gradx::A) where {M,N,R,T,A} - d .= (-).(gradx) +function A_mul_B!(d::T, L::LBFGS{R, T, M, I}, gradx::T) where {R, T, M, I} + d .= gradx idx = loop1!(d,L) d .= (*).(L.H, d) d = loop2!(d,idx,L) end -function loop1!(d::A, L::LBFGS{M,N,R,T,A}) where {M,N,R,T,A} +function loop1!(d::T, L::LBFGS{R, T, M, I}) where {R, T, M, I} idx = L.curridx - for i=1:L.currmem - L.alphas[idx] = real(vecdot(L.s_m[idx], d))/L.ys_m[idx] - d .-= L.alphas[idx].*L.y_m[idx] + for i = 1:L.currmem + L.alphas[idx] = real(blockvecdot(L.s_M[idx], d))/L.ys_M[idx] + d .-= L.alphas[idx] .* L.y_M[idx] idx -= 1 if idx == 0 idx = M end end return idx end -function loop2!(d::A, idx::Int, L::LBFGS{M,N,R,T,A}) where {M,N,R,T,A} - for i=1:L.currmem +function loop2!(d::T, idx::Int, L::LBFGS{R, T, M, I}) where {R, T, M, I} + for i = 1:L.currmem idx += 1 if idx > M idx = 1 end - beta = real(vecdot(L.y_m[idx], d))/L.ys_m[idx] - d .+= (L.alphas[idx].-beta).*L.s_m[idx] + beta = real(blockvecdot(L.y_M[idx], d))/L.ys_M[idx] + d .+= (L.alphas[idx] - beta) .* L.s_M[idx] end return d end # Properties - domainType(L::LBFGS{M,N,R,T,A}) where {M,N,R,T,A} = T -codomainType(L::LBFGS{M,N,R,T,A}) where {M,N,R,T,A} = T -size(A::LBFGS) = (size(A.s), size(A.s)) +domainType(L::LBFGS{R, T, M, I}) where {R, T, M, I} = blockeltype(L.y_M[1]) +codomainType(L::LBFGS{R, T, M, I}) where {R, T, M, I} = blockeltype(L.y_M[1]) + +size(A::LBFGS) = (blocksize(A.s), blocksize(A.s)) fun_name(A::LBFGS) = "LBFGS" diff --git a/src/syntax.jl b/src/syntax.jl index 47f8bbe..305164f 100644 --- a/src/syntax.jl +++ b/src/syntax.jl @@ -1,4 +1,4 @@ -import Base: blkdiag, transpose, *, +, -, getindex, hcat, vcat, reshape +import Base: blkdiag, transpose, *, +, -, getindex, hcat, vcat, reshape export jacobian ###### blkdiag ###### @@ -14,8 +14,8 @@ transpose{T <: AbstractOperator}(L::T) = Transpose(L) (-)(L1::AbstractOperator, L2::AbstractOperator) = Sum(L1, -L2 ) ###### * ###### -function (*){T <: Union{AbstractArray, Tuple}}(L::AbstractOperator, b::T) - y = deepzeros(codomainType(L), size(L, 1)) +function (*){T <: BlockArray}(L::AbstractOperator, b::T) + y = blockzeros(codomainType(L), size(L, 1)) A_mul_B!(y, L, b) return y end @@ -25,24 +25,24 @@ end # redefine .* Base.broadcast(::typeof(*), d::AbstractArray, L::AbstractOperator) = DiagOp(codomainType(L), size(d), d)*L -Base.broadcast(::typeof(*), d::AbstractArray, L::Scale) = DiagOp(L.coeff*d)*L.A +Base.broadcast(::typeof(*), d::AbstractArray, L::Scale) = DiagOp(L.coeff*d)*L.A # getindex -function getindex(A::AbstractOperator,idx...) +function getindex(A::AbstractOperator,idx...) if ndoms(A,2) == 1 Gout = GetIndex(codomainType(A),size(A,1),idx) return Gout*A - elseif length(idx) == 1 && ndoms(A,2) == length(idx[1]) + elseif length(idx) == 1 && ndoms(A,2) == length(idx[1]) return permute(A,idx[1]) else - error("cannot split operator of type $(typeof(H.A[i]))") + error("cannot split operator of type $(typeof(H.A[i]))") end end #get index of HCAT returns HCAT (or Operator) function getindex{M,N,L,P,C,A<:HCAT{M,N,L,P,C}}(H::A, idx::Union{AbstractArray,Int}) - unfolded = vcat([[i... ] for i in H.idxs]...) + unfolded = vcat([[i... ] for i in H.idxs]...) if length(idx) == length(unfolded) return permute(H,idx) else @@ -51,9 +51,9 @@ function getindex{M,N,L,P,C,A<:HCAT{M,N,L,P,C}}(H::A, idx::Union{AbstractArray,I for ii in eachindex(H.idxs) if i in H.idxs[ii] if typeof(H.idxs[ii]) <: Int - new_H = (new_H...,H.A[ii]) + new_H = (new_H...,H.A[ii]) else - error("cannot split operator") + error("cannot split operator") end end end @@ -66,7 +66,7 @@ end #get index of HCAT returns HCAT (or Operator) function getindex{M,N,L,P,C,A<:VCAT{M,N,L,P,C}}(H::A, idx::Union{AbstractArray,Int}) - unfolded = vcat([[i... ] for i in H.idxs]...) + unfolded = vcat([[i... ] for i in H.idxs]...) if length(idx) == length(unfolded) return permute(H,idx) else @@ -75,9 +75,9 @@ function getindex{M,N,L,P,C,A<:VCAT{M,N,L,P,C}}(H::A, idx::Union{AbstractArray,I for ii in eachindex(H.idxs) if i in H.idxs[ii] if typeof(H.idxs[ii]) <: Int - new_H = (new_H...,H.A[ii]) + new_H = (new_H...,H.A[ii]) else - error("cannot split operator") + error("cannot split operator") end end end @@ -98,4 +98,3 @@ reshape{A<:AbstractOperator}(L::A, idx::Vararg{Int}) = Reshape(L,idx) ###### jacobian ###### jacobian = Jacobian - diff --git a/src/utilities/block.jl b/src/utilities/block.jl new file mode 100644 index 0000000..c092bba --- /dev/null +++ b/src/utilities/block.jl @@ -0,0 +1,88 @@ +# Define block-arrays + +const RealOrComplex{R} = Union{R, Complex{R}} +const BlockArray{R} = Union{ + AbstractArray{C, N} where {C <: RealOrComplex{R}, N}, + Tuple{Vararg{AbstractArray{C, N} where {C <: RealOrComplex{R}, N}}} +} + +# Operations on block-arrays + +blocksize(x::Tuple) = blocksize.(x) +blocksize(x::AbstractArray) = size(x) + +blockeltype(x::Tuple) = blockeltype.(x) +blockeltype(x::AbstractArray) = eltype(x) + +blocklength(x::Tuple) = sum(blocklength.(x)) +blocklength(x::AbstractArray) = length(x) + +blockvecnorm(x::Tuple) = sqrt(blockvecdot(x, x)) +blockvecnorm{R <: Number}(x::AbstractArray{R}) = vecnorm(x) + +blockmaxabs(x::Tuple) = maximum(blockmaxabs.(x)) +blockmaxabs{R <: Number}(x::AbstractArray{R}) = maximum(abs, x) + +blocksimilar(x::Tuple) = blocksimilar.(x) +blocksimilar(x::AbstractArray) = similar(x) + +blockcopy(x::Tuple) = blockcopy.(x) +blockcopy(x::Array) = copy(x) + +blockcopy!(y::Tuple, x::Tuple) = blockcopy!.(y, x) +blockcopy!(y::AbstractArray, x::AbstractArray) = copy!(y, x) + +blockset!(y::Tuple, x) = blockset!.(y, x) +blockset!(y::AbstractArray, x) = (y .= x) + +blockvecdot{T <: Tuple}(x::T, y::T) = sum(blockvecdot.(x,y)) +blockvecdot{R <: Number}(x::AbstractArray{R}, y::AbstractArray{R}) = vecdot(x, y) + +blockzeros(t::Tuple, s::Tuple) = blockzeros.(t, s) +blockzeros(t::Type, n::NTuple{N, Integer} where {N}) = zeros(t, n) +blockzeros(t::Tuple) = blockzeros.(t) +blockzeros(n::NTuple{N, Integer} where {N}) = zeros(n) +blockzeros(n::Integer) = zeros(n) +blockzeros(a::AbstractArray) = zeros(a) + +blockaxpy!(z::Tuple, x, alpha::Real, y::Tuple) = blockaxpy!.(z, x, alpha, y) +blockaxpy!(z::AbstractArray, x, alpha::Real, y::AbstractArray) = (z .= x .+ alpha.*y) + +# Define broadcast + +import Base: broadcast! + +function broadcast!(f::Any, dest::Tuple, op1::Tuple) + for k = eachindex(dest) + broadcast!(f, dest[k], op1[k]) + end + return dest +end + +function broadcast!(f::Any, dest::Tuple, op1::Tuple, op2::Tuple) + for k = eachindex(dest) + broadcast!(f, dest[k], op1[k], op2[k]) + end + return dest +end + +function broadcast!(f::Any, dest::Tuple, coef::Number, op2::Tuple) + for k = eachindex(dest) + broadcast!(f, dest[k], coef, op2[k]) + end + return dest +end + +function broadcast!(f::Any, dest::Tuple, op1::Tuple, coef::Number, op2::Tuple) + for k = eachindex(dest) + broadcast!(f, dest[k], op1[k], coef, op2[k]) + end + return dest +end + +function broadcast!(f::Any, dest::Tuple, op1::Tuple, coef::Number, op2::Tuple, op3::Tuple) + for k = eachindex(dest) + broadcast!(f, dest[k], op1[k], coef, op2[k], op3[k]) + end + return dest +end diff --git a/src/utilities/deep.jl b/src/utilities/deep.jl deleted file mode 100644 index 275001b..0000000 --- a/src/utilities/deep.jl +++ /dev/null @@ -1,29 +0,0 @@ -# Generalized length, dot product, norm, similar and deepcopy for tuples - -deepsimilar(x::Tuple) = deepsimilar.(x) - -deepsimilar(x::AbstractArray) = similar(x) - -deepcopy!{T <: Tuple}(y::T, x::T) = deepcopy!.(y, x) - -deepcopy!{R <: Number}(y::AbstractArray{R}, x::AbstractArray{R}) = copy!(y, x) - -deeplength(x::Tuple) = sum(deeplength.(x)) - -deeplength(x::AbstractArray) = length(x) - -deepvecdot{T <: Tuple}(x::T, y::T) = sum(deepvecdot.(x,y)) - -deepvecdot{R <: Number}(x::AbstractArray{R}, y::AbstractArray{R}) = vecdot(x, y) - -deepvecnorm(x::Tuple) = sqrt(deepvecdot(x, x)) - -deepvecnorm{R <: Number}(x::AbstractArray{R}) = vecnorm(x) - -deepmaxabs(x::Tuple) = maximum(deepmaxabs.(x)) - -deepmaxabs{R <: Number}(x::AbstractArray{R}) = maximum(abs, x) - -deepzeros(t::Tuple, s::Tuple) = deepzeros.(t, s) - -deepzeros(t::Type, n::NTuple{N, Integer} where {N}) = zeros(t, n) diff --git a/test/runtests.jl b/test/runtests.jl index 55a4dbb..a2f4df2 100644 --- a/test/runtests.jl +++ b/test/runtests.jl @@ -9,9 +9,9 @@ verb = true @testset "AbstractOperators" begin -@testset "Tuple operations" begin - include("test_deep.jl") -end +#@testset "Tuple operations" begin +# include("test_deep.jl") +#end @testset "Linear operators" begin include("test_linear_operators.jl") @@ -26,22 +26,12 @@ end include("test_nonlinear_operators_calculus.jl") end -@testset "Syntax shorthands" begin - include("test_syntax.jl") -end - @testset "L-BFGS" begin include("test_lbfgs.jl") - include("test_lbfgs_larger.jl") end +@testset "Syntax shorthands" begin + include("test_syntax.jl") end - - - - - - - - +end diff --git a/test/test_lbfgs.jl b/test/test_lbfgs.jl index bcb4807..aafbb66 100644 --- a/test/test_lbfgs.jl +++ b/test/test_lbfgs.jl @@ -1,4 +1,6 @@ -# @printf("\nTesting L-BFGS routines\n") +@printf("\nTesting L-BFGS routines\n") + +function test_lbfgs() Q = [32.0000 13.1000 -4.9000 -3.0000 6.0000 2.2000 2.6000 3.4000 -1.9000 -7.5000; 13.1000 18.3000 -5.3000 -9.5000 3.0000 2.1000 3.9000 3.0000 -3.6000 -4.4000; @@ -33,32 +35,68 @@ dirs_ref = [ ] dirs = zeros(10, 5) # matrix of directions (to be filled in) -dir = zeros(10) # matrix of directions (to be filled in) mem = 3; -# col = 0; # last column of Sk, Yk that was filled in -# currmem = 0; -# H0 = 1.0 x = zeros(10) -A = LBFGS(Float64,size(x),mem) -A = LBFGS(x,mem) -println(A) -x_old = 0; -grad_old = 0; +H = LBFGS(x, mem) +dir = zeros(10) +println(H) + +HH = LBFGS((x, x), mem) +dirdir = (zeros(10), zeros(10)) +println(HH) + +x_old = []; +grad_old = []; for i = 1:5 + x = xs[:,i] grad = Q*x + q + if i > 1 - @time update!(A, x, x_old, grad, grad_old) - A_mul_B!(dir,A,grad) - else - copy!(dir, -grad) + @time update!(H, x, x_old, grad, grad_old) + @time update!(HH, (x, x), (x_old, x_old), (grad, grad), (grad_old, grad_old)) end - dirs[:, i] = copy(dir) + + dir_ref = dirs_ref[:,i] + + gradm = -grad + @time A_mul_B!(dir, H, gradm) + @test vecnorm(dir-dir_ref, Inf)/(1+vecnorm(dir_ref, Inf)) <= 1e-15 + + gradm2 = (-grad,-grad) + @time A_mul_B!(dirdir, HH, gradm2) + @test vecnorm(dirdir[1]-dir_ref, Inf)/(1+vecnorm(dir_ref, Inf)) <= 1e-15 + @test vecnorm(dirdir[2]-dir_ref, Inf)/(1+vecnorm(dir_ref, Inf)) <= 1e-15 + x_old = x; grad_old = grad; + +end + end -@test vecnorm(dirs-dirs_ref, Inf)/(1+vecnorm(dirs_ref, Inf)) <= 1e-15 +test_lbfgs() + +#test other constructors +mem = 3 +x = (zeros(10),zeros(Complex{Float64},10)) +H = LBFGS(x, mem) +println(H) + +dim = (10,) +H = LBFGS(dim, mem) +println(H) + +dim = ((10,),(10,)) +H = LBFGS(dim, mem) +println(H) + +D = (Float64,Complex{Float64}) +dim = ((10,),(10,)) +H = LBFGS(D,dim, mem) +println(H) + + diff --git a/test/test_lbfgs_larger.jl b/test/test_lbfgs_larger.jl deleted file mode 100644 index bd3b910..0000000 --- a/test/test_lbfgs_larger.jl +++ /dev/null @@ -1,72 +0,0 @@ -# @printf("\nTesting L-BFGS routines on a larger problem\n") - -function foo() - - n = 10000 - dens = 100/n - Q = sprandn(n, n, dens/2) - Q = 0.5*(Q+Q') + spdiagm(ones(n), 0) # this is spd - q = randn(n) - - N = 15 # number of steps - mem = 5; - x = zeros(n) - A = LBFGS(x,mem) - println(A) - println() - nh = round(Int64,n/2) - x2 = (zeros(nh),zeros(nh)) - A2 = LBFGS(x2,mem) - println(A2) - - x_old = randn(n) - x_old2 = (randn(nh),randn(nh)) - grad_old = randn(n) - grad_old2 = (randn(nh),randn(nh)) - grad = randn(n) - grad2 = (randn(nh),randn(nh)) - - dir = zeros(n) # matrix of directions (to be filled in) - dir2 = (zeros(round(Int64,n/2)),zeros(round(Int64,n/2))) # matrix of directions (to be filled in) - for i = 1:N - x = randn(n) - x2 = (x[1:nh],x[nh+1:end]) - grad = Q*x + q - grad2 = (grad[1:nh],grad[nh+1:end]) - if i > 1 - println("LBFGS update") - @time update!(A, x, x_old, grad, grad_old) - println("Tuple LBFGS update") - @time update!(A2, x2, x_old2, grad2, grad_old2) - println("LBFGS mul") - @time A_mul_B!(dir,A,grad) - println("Tuple LBFGS mul") - @time A_mul_B!(dir2,A2,grad2) - else - copy!(dir, -grad) - dir2 = deepcopy((-).(grad2)) - end - @assert vecdot(dir, grad) < 0 - x_old = x - x_old2 = deepcopy(x2) - grad_old = grad - grad_old2 = deepcopy(grad2) - end - @test norm(dir[1:nh]-dir2[1])<1e-3 - @test norm(dir[nh+1:end]-dir2[2])<1e-3 - - x2 = (randn(nh),randn(nh)+randn(nh)*im) - x_old2 = (randn(nh),randn(nh)+randn(nh)*im) - grad2 = (randn(nh),randn(nh)+randn(nh)*im) - grad_old2 = (randn(nh),randn(nh)+randn(nh)*im) - dir2 = (randn(nh),randn(nh)+randn(nh)*im) - A2 = LBFGS(x2,mem) - update!(A2, x2, x_old2, grad2, grad_old2) - #@code_warntype update!(A2, x2, x_old2, grad2, grad_old2) - @time update!(A2, x2, x_old2, grad2, grad_old2) - A_mul_B!(dir2,A2,grad2) - @time A_mul_B!(dir2,A2,grad2) - #@code_warntype update!(A2, x2, x_old2, grad2, grad_old2) -end - -foo() diff --git a/test/test_linear_operators_calculus.jl b/test/test_linear_operators_calculus.jl index 377084a..ca32d02 100644 --- a/test/test_linear_operators_calculus.jl +++ b/test/test_linear_operators_calculus.jl @@ -650,7 +650,7 @@ y = test_op(op, x, y0, verb) p = randperm(ndoms(op,2)) y2 = op[p]*x[p] -@test AbstractOperators.deepvecnorm(y .- y2) <= 1e-8 +@test AbstractOperators.blockvecnorm(y .- y2) <= 1e-8 # test Scale of Sum diff --git a/test/utils.jl b/test/utils.jl index eaa7b10..58b0f94 100644 --- a/test/utils.jl +++ b/test/utils.jl @@ -4,23 +4,23 @@ function test_op(A::AbstractOperator, x, y, verb::Bool = false) verb && (println(); show(A); println()) Ax = A*x - Ax2 = AbstractOperators.deepsimilar(Ax) + Ax2 = AbstractOperators.blocksimilar(Ax) verb && println("forward preallocated") A_mul_B!(Ax2, A, x) #verify in-place linear operator works verb && @time A_mul_B!(Ax2, A, x) - @test AbstractOperators.deepvecnorm(Ax .- Ax2) <= 1e-8 + @test AbstractOperators.blockvecnorm(Ax .- Ax2) <= 1e-8 Acy = A'*y - Acy2 = AbstractOperators.deepsimilar(Acy) + Acy2 = AbstractOperators.blocksimilar(Acy) verb && println("adjoint preallocated") Ac_mul_B!(Acy2, A, y) #verify in-place linear operator works verb && @time Ac_mul_B!(Acy2, A, y) - @test AbstractOperators.deepvecnorm(Acy .- Acy2) <= 1e-8 + @test AbstractOperators.blockvecnorm(Acy .- Acy2) <= 1e-8 - s1 = real(AbstractOperators.deepvecdot(Ax2, y)) - s2 = real(AbstractOperators.deepvecdot(x, Acy2)) + s1 = real(AbstractOperators.blockvecdot(Ax2, y)) + s2 = real(AbstractOperators.blockvecdot(x, Acy2)) @test abs( s1 - s2 ) < 1e-8 return Ax @@ -32,14 +32,14 @@ function test_NLop(A::AbstractOperator, x, y, verb::Bool = false) verb && (println(),println(A)) Ax = A*x - Ax2 = AbstractOperators.deepsimilar(Ax) + Ax2 = AbstractOperators.blocksimilar(Ax) verb && println("forward preallocated") A_mul_B!(Ax2, A, x) #verify in-place linear operator works verb && @time A_mul_B!(Ax2, A, x) @test_throws ErrorException A' - @test AbstractOperators.deepvecnorm(Ax .- Ax2) <= 1e-8 + @test AbstractOperators.blockvecnorm(Ax .- Ax2) <= 1e-8 J = Jacobian(A,x) verb && println(J) @@ -47,16 +47,16 @@ function test_NLop(A::AbstractOperator, x, y, verb::Bool = false) grad = J'*y A_mul_B!(Ax2, A, x) #redo forward verb && println("jacobian Ac_mul_B! preallocated") - grad2 = AbstractOperators.deepsimilar(grad) + grad2 = AbstractOperators.blocksimilar(grad) Ac_mul_B!(grad2, J, y) #verify in-place linear operator works verb && A_mul_B!(Ax2, A, x) #redo forward verb && @time Ac_mul_B!(grad2, J, y) - @test AbstractOperators.deepvecnorm(grad .- grad2) < 1e-8 + @test AbstractOperators.blockvecnorm(grad .- grad2) < 1e-8 grad3 = gradient_fd(A,Ax,x,y) #calculate gradient using finite differences - @test AbstractOperators.deepvecnorm(grad .- grad3) < 1e-4 + @test AbstractOperators.blockvecnorm(grad .- grad3) < 1e-4 return Ax, grad end