forked from gonum/gonum
-
Notifications
You must be signed in to change notification settings - Fork 0
/
dlaqps.go
244 lines (216 loc) · 5.77 KB
/
dlaqps.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
// Copyright ©2017 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package gonum
import (
"math"
"gonum.org/v1/gonum/blas"
"gonum.org/v1/gonum/blas/blas64"
)
// Dlaqps computes a step of QR factorization with column pivoting
// of an m×n matrix A by using Blas-3. It tries to factorize nb
// columns from A starting from the row offset, and updates all
// of the matrix with Dgemm.
//
// In some cases, due to catastrophic cancellations, it cannot
// factorize nb columns. Hence, the actual number of factorized
// columns is returned in kb.
//
// Dlaqps computes a QR factorization with column pivoting of the
// block A[offset:m, 0:nb] of the m×n matrix A. The block
// A[0:offset, 0:n] is accordingly pivoted, but not factorized.
//
// On exit, the upper triangle of block A[offset:m, 0:kb] is the
// triangular factor obtained. The elements in block A[offset:m, 0:n]
// below the diagonal, together with tau, represent the orthogonal
// matrix Q as a product of elementary reflectors.
//
// offset is number of rows of the matrix A that must be pivoted but
// not factorized. offset must not be negative otherwise Dlaqps will panic.
//
// On exit, jpvt holds the permutation that was applied; the jth column
// of A*P was the jpvt[j] column of A. jpvt must have length n,
// otherwise Dlapqs will panic.
//
// On exit tau holds the scalar factors of the elementary reflectors.
// It must have length nb, otherwise Dlapqs will panic.
//
// vn1 and vn2 hold the partial and complete column norms respectively.
// They must have length n, otherwise Dlapqs will panic.
//
// auxv must have length nb, otherwise Dlaqps will panic.
//
// f and ldf represent an n×nb matrix F that is overwritten during the
// call.
//
// Dlaqps is an internal routine. It is exported for testing purposes.
func (impl Implementation) Dlaqps(m, n, offset, nb int, a []float64, lda int, jpvt []int, tau, vn1, vn2, auxv, f []float64, ldf int) (kb int) {
switch {
case m < 0:
panic(mLT0)
case n < 0:
panic(nLT0)
case offset < 0:
panic(offsetLT0)
case offset > m:
panic(offsetGTM)
case nb < 0:
panic(nbLT0)
case nb > n:
panic(nbGTN)
case lda < max(1, n):
panic(badLdA)
case ldf < max(1, nb):
panic(badLdF)
}
if m == 0 || n == 0 {
return 0
}
switch {
case len(a) < (m-1)*lda+n:
panic(shortA)
case len(jpvt) != n:
panic(badLenJpvt)
case len(vn1) < n:
panic(shortVn1)
case len(vn2) < n:
panic(shortVn2)
}
if nb == 0 {
return 0
}
switch {
case len(tau) < nb:
panic(shortTau)
case len(auxv) < nb:
panic(shortAuxv)
case len(f) < (n-1)*ldf+nb:
panic(shortF)
}
if offset == m {
return 0
}
lastrk := min(m, n+offset)
lsticc := -1
tol3z := math.Sqrt(dlamchE)
bi := blas64.Implementation()
var k, rk int
for ; k < nb && lsticc == -1; k++ {
rk = offset + k
// Determine kth pivot column and swap if necessary.
p := k + bi.Idamax(n-k, vn1[k:], 1)
if p != k {
bi.Dswap(m, a[p:], lda, a[k:], lda)
bi.Dswap(k, f[p*ldf:], 1, f[k*ldf:], 1)
jpvt[p], jpvt[k] = jpvt[k], jpvt[p]
vn1[p] = vn1[k]
vn2[p] = vn2[k]
}
// Apply previous Householder reflectors to column K:
//
// A[rk:m, k] = A[rk:m, k] - A[rk:m, 0:k-1]*F[k, 0:k-1]ᵀ.
if k > 0 {
bi.Dgemv(blas.NoTrans, m-rk, k, -1,
a[rk*lda:], lda,
f[k*ldf:], 1,
1,
a[rk*lda+k:], lda)
}
// Generate elementary reflector H_k.
if rk < m-1 {
a[rk*lda+k], tau[k] = impl.Dlarfg(m-rk, a[rk*lda+k], a[(rk+1)*lda+k:], lda)
} else {
tau[k] = 0
}
akk := a[rk*lda+k]
a[rk*lda+k] = 1
// Compute kth column of F:
//
// Compute F[k+1:n, k] = tau[k]*A[rk:m, k+1:n]ᵀ*A[rk:m, k].
if k < n-1 {
bi.Dgemv(blas.Trans, m-rk, n-k-1, tau[k],
a[rk*lda+k+1:], lda,
a[rk*lda+k:], lda,
0,
f[(k+1)*ldf+k:], ldf)
}
// Padding F[0:k, k] with zeros.
for j := 0; j < k; j++ {
f[j*ldf+k] = 0
}
// Incremental updating of F:
//
// F[0:n, k] := F[0:n, k] - tau[k]*F[0:n, 0:k-1]*A[rk:m, 0:k-1]ᵀ*A[rk:m,k].
if k > 0 {
bi.Dgemv(blas.Trans, m-rk, k, -tau[k],
a[rk*lda:], lda,
a[rk*lda+k:], lda,
0,
auxv, 1)
bi.Dgemv(blas.NoTrans, n, k, 1,
f, ldf,
auxv, 1,
1,
f[k:], ldf)
}
// Update the current row of A:
//
// A[rk, k+1:n] = A[rk, k+1:n] - A[rk, 0:k]*F[k+1:n, 0:k]ᵀ.
if k < n-1 {
bi.Dgemv(blas.NoTrans, n-k-1, k+1, -1,
f[(k+1)*ldf:], ldf,
a[rk*lda:], 1,
1,
a[rk*lda+k+1:], 1)
}
// Update partial column norms.
if rk < lastrk-1 {
for j := k + 1; j < n; j++ {
if vn1[j] == 0 {
continue
}
// The following marked lines follow from the
// analysis in Lapack Working Note 176.
r := math.Abs(a[rk*lda+j]) / vn1[j] // *
temp := math.Max(0, 1-r*r) // *
r = vn1[j] / vn2[j] // *
temp2 := temp * r * r // *
if temp2 < tol3z {
// vn2 is used here as a collection of
// indices into vn2 and also a collection
// of column norms.
vn2[j] = float64(lsticc)
lsticc = j
} else {
vn1[j] *= math.Sqrt(temp) // *
}
}
}
a[rk*lda+k] = akk
}
kb = k
rk = offset + kb
// Apply the block reflector to the rest of the matrix:
//
// A[offset+kb+1:m, kb+1:n] := A[offset+kb+1:m, kb+1:n] - A[offset+kb+1:m, 1:kb]*F[kb+1:n, 1:kb]ᵀ.
if kb < min(n, m-offset) {
bi.Dgemm(blas.NoTrans, blas.Trans,
m-rk, n-kb, kb, -1,
a[rk*lda:], lda,
f[kb*ldf:], ldf,
1,
a[rk*lda+kb:], lda)
}
// Recomputation of difficult columns.
for lsticc >= 0 {
itemp := int(vn2[lsticc])
// NOTE: The computation of vn1[lsticc] relies on the fact that
// Dnrm2 does not fail on vectors with norm below the value of
// sqrt(dlamchS)
v := bi.Dnrm2(m-rk, a[rk*lda+lsticc:], lda)
vn1[lsticc] = v
vn2[lsticc] = v
lsticc = itemp
}
return kb
}