-
Notifications
You must be signed in to change notification settings - Fork 0
/
logRegL2.jl
101 lines (90 loc) · 3.02 KB
/
logRegL2.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
# Logistic regression with L2 regularization
# notation:
# X = features matrix, w = solution, y = labels {-1,+1}
# f = objective, g = gradient, f' = the (non-linear) part of the gradient
# nMM = number of matrix-vector multiplications, z = Xw, k = constants
# addComp = additive component for f' evaluation for the version optimized
# for linear composition problems. same dimension as w and the optimizer
# code adds this to account for any regularization term. see example in
# the directional derivative calculation in Wolfe in linesearch.jl
include("misc.jl")
# objective evaluation for linear composition problems
function objLinear(z,w,y;k=nothing,lambda=1.0)
nMM = 0
if k == nothing
f = sum(log.(1 .+ exp.(-y.*z)))
else
f = sum(log.(1 .+ exp.(-y.*(z.+k))))
end
f += 0.5*lambda*norm(w,2)
return (f,nMM)
end
# f' evaluation for linear composition problems
# arguments for gradient wrt iterates (outer loop):
# z = Xw --> g(w) = X^T * fPrime + addComp
# arguments for gradient wrt stepsizes (inner loop):
# z = XDt, w=t, k = Xw --> g(t) = (XD)^T * fPrime + addComp
function fPrimeLinear(z,w,y;k=nothing,lambda=1.0)
nMM = 0
if k == nothing
yz = y .* z
else
yz = y .* (z+k)
end
fPrime = -y./(1 .+ exp.(yz))
addComp = lambda*w
return (fPrime,addComp,nMM)
end
# gradient evaluation for linear composition problems
function gradLinear(z,w,X,y;k=nothing)
fPrime,addComp,nMM = fPrimeLinear(z,w,y,k=k)
g = X'*fPrime .+ addComp; nMM += 1
return (g,nMM)
end
#f'' evaluation for linear composition problems
function fDoublePrimeLinear(z,w,X,y;k=nothing)
#fPrime,_,nMM = logisticFPrimeLinear(z,w,y,k=k)
nMM = 0
if k==nothing
expZ=exp.(-y.*z)
else
expZ=exp.(-y.*(z+k))
end
sumExpZ = sum(expZ)
mult = 1/ sumExpZ
fPrimePrime = mult .* diagm(vec(expZ)) + mult^2 .* expZ.*expZ'; nMM += 2
return (fPrimePrime,nMM)
end
function hessianLinear(z,w,X,y;k=nothing)
(m,n) = size(X)
fDoublePrime,nMM = fDoublePrimeLinear(z,w,X,y,k=k)
H = X'*fDoublePrime*X + Matrix(I(n)); nMM += 2
return (H,nMM)
end
# objective and gradient evaluations for minFuncNonOpt to minimize f wrt t
function objAndGrad(t,D,w,X,y)
nMM = 0
w_new = w + D*t
z = y.*(X*w_new); nMM += 1
f = sum(log.(1 .+ exp.(-z)))
g = -(X*D)'*(y./(1 .+ exp.(z))); nMM += 1 #regularization does not affect g here
return (f,g,nMM)
end
# objective and gradient evaluations for minFuncNonOpt to minimize f wrt w
function objAndGrad(w,X,y)
nMM = 0
yXw = y.*(X*w); nMM += 1
f = sum(log.(1 .+ exp.(-yXw))) + 0.5*norm(w,2)
g = -X'*(y./(1 .+ exp.(yXw))) .+ w; nMM += 1
return (f,g,nMM)
end
# function value and zeros for gradient for minFuncNonOpt
# (called by lsArmijoNonOpt)
function objAndNoGrad(w,X,y)
nMM = 0
(m,n) = size(X)
yXw = y.*(X*w); nMM += 1
f = sum(log.(1 .+ exp.(-yXw))) + 0.5*norm(w,2)
g = zeros(m,1)
return (f,g,nMM)
end