-
Notifications
You must be signed in to change notification settings - Fork 0
/
Newton_optim.py
251 lines (225 loc) · 8.68 KB
/
Newton_optim.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
import numpy as np
import sympy as sym
import matplotlib.pyplot as plt
class optimizer(object):
def __init__(self,f_mode='f',rph=0.1,init_alpha=1,t=2):
self.f_mode = f_mode
self.rph = rph
self.init_alpha = init_alpha
self.t = t
def Hessian(self,x,unknown=False):
'''
计算Hessian阵
'''
lens = len(x)
s = 'x:'+str(lens+1)
symp_x = sym.symbols(s)
# print(symp_x)
symp_x = symp_x[1:]
match_dic = {}
for i in range(lens):
match_dic[symp_x[i]] = x[i]
if unknown:
fx,npfx = self.unknown_f(symp_x,x)
else:
fx = self.f(symp_x)
H = sym.zeros(lens,lens)
for j,r in enumerate(symp_x):
for k , l in enumerate(symp_x):
H[j,k] = sym.diff(sym.diff(fx,r),l).evalf(subs=match_dic)
return np.array(H.tolist()).astype(np.float64)
def get_symx(self,lens):
'''
根据函数变量个数给出符号元素
'''
result_x = 'x:'+str(lens+1)
result_x = sym.symbols(result_x)
return result_x[1:]
def unknown_f(self,x,npx):
'''
根据符号变量元素和实数值给出最后的函数表达式和函数值
:param x :sympy的符号变量元素
:param npx :numpy类型的变量数值
:return fx : 函数的符号表达式
:return npfx : 函数的数值结果
'''
lens = len(x)
results = []
match_dic = {}
for i in range(lens-1):
results.append((1-x[i])**2 + 100*(x[i+1]-x[i]**2)**2)
match_dic[x[i]] = npx[i]#产生后面要用的subs字典,将npx中的数值赋值给符号类型数据x
match_dic[x[lens-1]] = npx[lens-1]
fx = results[0]
for i in range(1,lens-1):
fx = fx + results[i]
npfx = float(fx.evalf(subs=match_dic))#将数值与符号变量匹配输入函数中得到函数结果
return fx,npfx
def unknown_g(self,x,npx,fx):
'''
根据符号变量元素和实数值以及函数表达式给出最后的梯度数值结果
:param x :sympy的符号变量元素
:param npx :numpy类型的变量数值
:param fx : 函数的符号表达式
:return : 函数的梯度数值结果
'''
lens = len(x)
grad_results = []
match_dic = {}
for i in range(lens):
grad_results.append(sym.diff(fx,x[i]))
match_dic[x[i]] = npx[i]#产生后面要用的subs字典,将npx中的数值赋值给符号类型数据x
np_results = []
for i in range(lens):
np_results.append(float(grad_results[i].evalf(subs=match_dic)))
return np.array(np_results)
def f(self,x):
'''
返回第四题函数数值结果
'''
x1 ,x2 = x[0],x[1]
# print('x1:',x1,x2)
return (x1**3 - x2)**2 + 2*(x2 - x1)**4
def g(self,x):
'''
返回第四题函数梯度数值结果
'''
x1 ,x2 = x[0],x[1]
g1 = 2*(x1**3 - x2)*3*x1**2 - 8*(x2-x1)**3
g2 = 8 *(x2 - x1)**3 - 2*(x1**3 - x2)
results = np.array([g1,g2])
return results
def newton_opt(self,x0,epoches=1000,eps=1e-6,init_alpha = 1):
lens = len(x0)
fx0 = self.f(x0)
grad0 = self.g(x0)
delta = np.sum(grad0**2)
i = 1
x = x0
record_f = np.zeros((1,epoches))#存储迭代过程中的函数值
record_g = np.zeros((lens,epoches))#存储迭代过程中的函数梯度值
record_f[0,0] = fx0
record_g[:,0] = grad0
while(i < epoches and delta >eps):
grad = self.g(x)
Gx = self.Hessian(x)
Gx_reverse = np.linalg.inv(Gx)
dk = np.dot(-Gx_reverse,grad)
x = x + dk
record_f[0,i] = self.f(x)
record_g[:,i] = self.g(x)
i = i + 1
delta = np.sum(grad**2)
return x,i,record_f[:,:i],record_g[:,:i]
def unknownf_newton(self,x0,epoches=1000,eps=1e-6,init_alpha = 1):
'''
针对函数变量不定的情况给出的牛顿法优化过程
'''
lens = len(x0)
symx = self.get_symx(lens)#根据变量个数得到符号变量
sym_fx,npfx = self.unknown_f(symx,x0)#得到函数符号表达式以及函数数值
grad0 = self.unknown_g(symx,x0,sym_fx)#得到函数梯度数值
delta = np.sum(grad0**2)
i = 1
x = x0
record_f = np.zeros((1,epoches))#存储函数值
record_g = np.zeros((lens,epoches))#存储梯度值
record_x = np.zeros((lens,epoches))#存储迭代过程中的更新变量
record_normG = np.zeros((1,epoches))#存储梯度范数
record_f[0,0] = npfx
record_g[:,0] = grad0
record_x[:,0] = x
record_normG[0,0] = np.linalg.norm(grad0)
while(i < epoches and delta >eps):
grad = self.unknown_g(symx,x,sym_fx)
result_x = x
Gx = self.Hessian(x,unknown=True)
Gx_reverse = np.linalg.inv(Gx)
dk = np.dot(-Gx_reverse,grad)
x = x + dk
record_x[:,i] = x
sym_fx,npfx = self.unknown_f(symx,x)
record_f[0,i] = npfx
grad = self.unknown_g(symx,x,sym_fx)
record_g[:,i] = grad
record_normG[0,i] = np.linalg.norm(grad)
i = i + 1
delta = np.sum(grad**2)
return x,i,record_f[:,:i],record_g[:,:i],record_x[:,:i],record_normG[:,:i]
def __call__(self,x0):
if self.f_mode == 'f':
final_x,epoches,records_f,records_g = self.newton_opt(x0)
return final_x,epoches,records_f,records_g
else:
final_x,epoches,records_f,records_g,records_x ,record_normG= self.unknownf_newton(x0)
return final_x,epoches,records_f,records_g,records_x,record_normG
if __name__ == '__main__':
x0 = np.array([2.,3.])
opts = optimizer()
final_x,epoches,records_f,records_g = opts(x0)#调用optimizer中的__call__方法
print('第四题最优点和最优值分别为:',final_x,records_f[:,-1])
lens = 2
x0 = np.zeros((lens,))
x0[0] = 1
opts = optimizer(f_mode='unknown')
final_x,epoches,records_f,records_g,records_x,record_normG = opts(x0)
print('第五题N=2时最优点和最优值分别为:',final_x,records_f[:,-1])
#绘制等高线和更新点
def height(x, y):
return (1-x)**2 +100 *(y-x**2)**2
# return (1 - x / 2 + x ** 5 + y ** 3) * np.exp(-x ** 2 - y ** 2)
x = np.linspace(-2, 2, 100)
y = np.linspace(-2, 2, 100)
X, Y = np.meshgrid(x, y)
# 为等高线填充颜色 10表示按照高度分成20层
plt.contourf(X, Y, height(X, Y), 20, alpha=0.75, cmap=plt.cm.hot)
x1 = records_x[0,:]
x2 = records_x[1,:]
plt.plot(x1[0],x2[0],'*')
plt.plot(x1[1:-2],x2[1:-2],'o')
plt.plot(x1[-1],x2[-1],'s')
plt.plot(x1,x2,linewidth=1,color='red')
plt.title('contour line',fontsize=30)
plt.show()
#N=7时求解最优解并显示函数值和梯度值
lens = 7
x0 = np.zeros((lens,))
x0[0] = 1
opts = optimizer(f_mode='unknown')
final_x,epoches,records_f,records_g,records_x,record_normG = opts(x0)
print('第五题N=7时最优点和最优值分别为:',final_x,records_f[:,-1])
if epoches > 500:
epoches = 500
epos = np.linspace(0,epoches,epoches)
plt.subplot(1,2,1)
plt.plot(epos,records_f[0,:epoches],label='function values')
plt.xlabel('epoch')
plt.ylabel('function values')
plt.text(-0.5,3,"function values",fontsize=20,color="red")
plt.subplot(1,2,2)
plt.plot(epos,record_normG[0,:epoches],label='function gradients')
plt.xlabel('epoch')
plt.ylabel('function gradients')
plt.text(-0.5,3,"grad_descend",fontsize=20,color="red")
plt.show()
#N=20时求解最优解并显示函数值和梯度值
lens = 20
x0 = np.zeros((lens,))
x0[0] = 1
opts = optimizer(f_mode='unknown')
final_x,epoches,records_f,records_g,records_x,record_normG = opts(x0)
print('第五题N=20时最优点和最优值分别为:',final_x,records_f[:,-1])
if epoches > 500:
epoches = 500
epos = np.linspace(0,epoches,epoches)
plt.subplot(1,2,1)
plt.plot(epos,records_f[0,:epoches],label='function values')
plt.xlabel('epoch')
plt.ylabel('function values')
plt.text(-0.5,3,"function values",fontsize=20,color="red")
plt.subplot(1,2,2)
plt.plot(epos,record_normG[0,:epoches],label='function gradients')
plt.xlabel('epoch')
plt.ylabel('function gradients')
plt.text(-0.5,3,"grad_descend",fontsize=20,color="red")
plt.show()