-
Notifications
You must be signed in to change notification settings - Fork 0
/
runRobustFusedLasso.py
244 lines (187 loc) · 8.23 KB
/
runRobustFusedLasso.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
'''
This file runs the experiments in
[1]: "Projective Splitting with Forward Steps only Requires Continuity", Patrick R. Johnstone, Jonathan Eckstein, https://arxiv.org/pdf/1809.07180.pdf
There are three experiments in the paper.
To choose each experiment, set the variable Exp as follows:
Exp = 1 for p=1.3
Exp = 2 for p=1.5
Exp = 3 for p=1.7
The following algorithms are run:
projective splitting (ps)
the subgradient method (subg)
the proximal subgradient method (prox-sg)
Tseng's method applied to the primal-dual inclusion (Tseng-pd)
These algorithms are defined in the file robustFusedLasso.py
'''
import numpy as np
import robustFusedLasso as fl
import time
from matplotlib import pyplot as plt
import sys
import argparse
parser = argparse.ArgumentParser(description='Robust fused LASSO Experiment')
parser.add_argument('--dimension', type=int, default=2000, dest='dimension',
help='problem dimension/number of attributes',metavar='D')
parser.add_argument('--observations', type=int, default=1000, dest='observations',
help='number of observations/measurements',metavar='N')
parser.add_argument('--power', type=float, default=1.3, dest='power',
help='exponent in loss function, must be > 1',metavar='P')
parser.add_argument('--cutoff', type=int, default=4000, dest='cutoff',
help='truncate chart horizontal axis after C ' +
'matrix multiplications',metavar='C')
parser.add_argument('--lambda1', type=float, default=1.0, dest='lambda1',
help='fusion penalty',metavar='L1')
parser.add_argument('--lambda2', type=float, default=1.0, dest='lambda2',
help='LASSO penalty',metavar='L2')
parser.add_argument('--nolegend', default=True, dest='legend',
action='store_false',help='omit chart legend')
args = parser.parse_args()
d = args.dimension #dimension of the problem
n = args.observations # number of measurements, i.e. rows of A
p = args.power
lam1 = args.lambda1
lam2 = args.lambda2
cutoff = args.cutoff
if p <= 1:
print("Loss function exponent " + str(p) + ' is too small')
print("It should be greater than 1")
sys.exit(1)
print(str(n) + ' x ' + str(d) + ' problem, exponent=' + str(p))
print("generating the random data...")
A = np.random.normal(0,1,[n,d])
print("normalizing A...")
A = A - sum(A)/d
A = A.dot(np.diag(1/np.sqrt(sum(A**2))))
print("generating true signal...")
k = 10 # number of nonzero groups
width = int(d/40) # size of each group
minLevel = -5
maxLevel = 5
xtest = np.random.randint(minLevel,maxLevel,k) # constant val of each group, integer between -5 and 5
xtest = np.concatenate([xtest,np.zeros(int(d/width) - k)])
np.random.shuffle(xtest)
Xtst = np.array([np.ones(width)*xtest[i] for i in range(len(xtest))])
xtest = Xtst.reshape(d)
print("generating noise with outliers...")
p_in = 0.9 # probability will be inlier
sigma_out = 5.0 #outlier standard deviation
outlier_mask = 1.0*(np.random.rand(n)>p_in) # selecting which measurments will be outliers w.p. 10%
epsin = np.random.normal(0,1,n) # inlier noise
epsout = np.random.normal(0,sigma_out,n) #outlier noise
print("generating measurements...")
b = A.dot(xtest)+ outlier_mask*epsout + (1-outlier_mask)*epsin
# define the functions used in each algorithm
def theGrad(x):
# gradient w.r.t. the loss (1/p)*np.linalg.norm(A.dot(x)-b,p)**p
r = A.dot(x) - b
return A.T.dot(abs(r)**(p-1)*np.sign(r))
def theSubGrad(x):
# subgradient of the ell_1 norm
return np.sign(x)
def theProx(x,rholam):
# prox of the ell_1 norm
return fl.proxL1(x,rholam)
def theFunc(x):
# overall objective function
return lam1*np.linalg.norm(fl.diffOperator(x),1) + (p**(-1))*np.linalg.norm(A.dot(x)-b,p)**p + lam2*np.linalg.norm(x,1)
def theProxL1star(x,thresh):
# projection onto the ell_infinity ball
# this is the prox of the conjugate function of the ell_1 norm and is used in Tseng-pd
return fl.projLInf(x, thresh)
iter = 2000 # number of iterations, same for each algorithm
print("running projective splitting (ps)...")
Delta = 1e0
gamma = 1e0
rho1_ps = 1e0
rho2_ps = 1e0
rho3_ps = 1e0
rhoIncrease = 1.0
t_ps = time.time()
[fx2_ps,x2_ps,mults_ps,times_ps] = \
fl.ps(d,iter,rho1_ps,rho2_ps,rho3_ps,theProx,theGrad,theFunc,gamma,lam1,lam2,Delta)
print("ps running time: "+str(time.time()-t_ps))
print("running the subgradient method (subg)...")
alpha_0 = 1e0
r = 1.0
tsubgstart = time.time()
iterSG = 2*iter #subg iterations only require two matrix multiplies by A rather
#than 4 as in ps and tseng-pd, so we allow twice as many total iterations
[x_sg,Fx_sg,times_sg] = fl.subg(d,iterSG,theGrad,theSubGrad,lam1,lam2,alpha_0,r,theFunc)
tsubgend = time.time()
print("subg running time: "+str(tsubgend-tsubgstart))
print("running the proximal subgradient method (prox-sg)")
alpha_0 = 1e0
r = 1.0
t_proxSG_start = time.time()
[x_proxSG,Fx_proxSG,times_proxsg] = fl.proxSubG(d,iterSG,theGrad,theProx,alpha_0,r,lam2,theFunc,lam1,theSubGrad)
t_proxSG_end = time.time()
print("prox SG running time: "+str(t_proxSG_end-t_proxSG_start))
print("running Tseng's method (tseng-pd)")
#### Tseng-PD ####
gamma = 1e2
alpha = 1e0
theta = 0.99
t_tg_prod_start = time.time()
[x_tg_prod,F_x_tg_prod,mults_tg_prod,times_tgprod] = fl.tseng_pd(d,iter,theGrad,alpha,theProxL1star,theta,theFunc,lam1,lam2,gamma)
t_tg_prod_end = time.time()
print("Tseng prod running time: "+str(t_tg_prod_end-t_tg_prod_start))
print("Plotting results...")
ourTick = 20
plt.rc('xtick',labelsize=ourTick)
plt.rc('ytick',labelsize=ourTick)
ax = plt.gca()
ourFont = 20
ourMark = 500
ourMarkSize = 15
ax.tick_params(width=3,length=10)
plotXvals = True
if(plotXvals):
print("plotting the true and recovered signals")
plt.plot(xtest) # true signal
plt.plot(x2_ps) # result returned by ps
plt.legend(['true signal','ps recovered signal'])
plt.show()
# estimate the optimal value as the minimum returned by all methods
opt = min(fx2_ps + Fx_sg+F_x_tg_prod+Fx_proxSG)
rawPlots = True
if(rawPlots):
print("plotting the function values vs number of multiplies")
plt_range = 100 #print only plot the first plt_range iterations
plt.plot(mults_ps[0:plt_range],fx2_ps[0:plt_range])
plt.plot(range(0,2*plt_range+2,2),Fx_sg[0:plt_range+1],'s-')
plt.plot(range(0,2*plt_range+2,2),Fx_proxSG[0:plt_range+1],'o-')
plt.plot(mults_tg_prod[0:plt_range],F_x_tg_prod[0:plt_range],'v-')
plt.plot(opt*np.ones(plt_range))
plt.legend(['ps', 'subg','prox-sg','tseng-pd'])
#plt.legend(bbox_to_anchor=[1, -1],
# bbox_transform=plt.gcf().transFigure)
plt.title('function values')
plt.show()
logPlots = True
if(logPlots):
print("plotting relative error of function values on a semi-log plot vs number of matrix multiplies")
plt.semilogy(mults_ps,(fx2_ps-opt)/opt)
plt.semilogy(mults_tg_prod, (F_x_tg_prod - opt) / opt,
's-',markevery = int(0.5*ourMark),markersize=ourMarkSize)
plt.semilogy(range(0,2*iterSG+2,2),(Fx_sg - opt)/opt,
'o-',markevery = ourMark,markersize=ourMarkSize) #number of multiplies per iteration is 2 for subg and prox subg
plt.semilogy(range(0,2*iterSG+2,2),(Fx_proxSG - opt)/opt,
'v-',markevery = ourMark,markersize=ourMarkSize)
ax = plt.gca()
ax.set_xlim(left=0,right=cutoff)
if args.legend:
plt.legend(['ps','tseng-pd','sg','prox-sg'],fontsize=ourFont,loc='lower left')
plt.xlabel('matrix multiplications',fontsize=ourFont)
plt.ylabel('relative objective gap',fontsize=ourFont)
plt.show()
time_plts = True
if(time_plts):
print("plotting relative error of function values on a semi-log plot vs cumulative time")
plt.semilogy(times_ps,(fx2_ps-opt)/opt)
plt.semilogy(times_tgprod, (F_x_tg_prod - opt) / opt)
plt.semilogy(times_sg,(Fx_sg - opt)/opt)
plt.semilogy(times_proxsg,(Fx_proxSG - opt)/opt)
plt.legend(['ps','tseng-pd','sg','prox-sg'],fontsize=ourFont)
plt.xlabel('running time (s)',fontsize=ourFont)
plt.ylabel('relative objective opt gap',fontsize=ourFont)
plt.show()