In [1]:
import sys
sys.path.append('../../pyutils')

import numpy as np

import metrics

np.random.seed(12)

# SVD (Singular Value Decomposition)

let $M \in \mathbb{R}^{m*n}$
The singular value decomposition of $M$ is:

$$M = U \Sigma V^T$$

$U$: $m * m$ orthogonal matrix.  
$\Sigma$: $m * n$ rectangular diagonal matrix with non-negative numbers on the diagonal.  
$V$: $n * n$ orthogonal matrix.

The columns of $U$ are the left-singular vectors of $M$, and the eigeinvectors of $MM^T$.  
The columns of $V$ are the right-singular vectors of $M$, and the eigeinvectors of $M^TM$.  
The diagonal entries of $\Sigma$ are the singular values of $M$ and the square root of the eigeinvalues of $MM^T$ ($or M^TM$).

## Implementation (Golub - Reincsh SVD)

### Ressources

- http://people.duke.edu/~hpgavin/SystemID/References/Golub+Reinsch-NM-1970.pdf
- http://www.cs.utexas.edu/users/inderjit/public_papers/HLA_SVD.pdf

1 Use Houlesholder transformations to reduce A to bidiagonal form.  
2 Use QR to find the singular values of the bidiagonaal matrix.  
3 Combine results to get SVD of A

### Householder transformation

Let $x \in \mathbb{R^n}$, $v \in \mathbb{R}^n$
$$x \to x - 2v(v^Tx)$$

The householder matrix is the projection matrix for that transformation.
$$P = I - 2vv^T$$

### Bidiagonalization

Transform the matrix $A \in \mathbb{R}^{m*n}$, $m \geq n$ to bidiagonal form.

$$P^TAQ = J^0$$

$J^0$ upper bidiagonal matrix of size $m*n$.  
$P$ orthogonal matrix of size $m*m$.  
$Q$ orthogonal matrix of size $n*n$.    
$P$ and $Q$ are sequances of householder matrices.

In [2]:
def house_vect(x):
    v = x.copy()
    v[0] = x[0] + np.sign(x[0]) * np.linalg.norm(x)
    return v
    
def house_mat(v):
    return np.eye(len(v)) - 2 * np.outer(v, v) / (v@v)

def bidiagonalize(A):
    m, n = A.shape
    P = np.eye(m)
    Q = np.eye(n)
    B = A.copy()
    
    for j in range(n):
        
        B_sub = B[j:, j:]
        v = house_vect(B_sub[:, 0])
        H = np.eye(m)
        H[j:, j:] = house_mat(v)
        B = H @ B
        P = P @ H.T
        
        if j < n - 2:
        
            B_sub = B[j:, j+1:]
            v = house_vect(B_sub[0])
            H = np.eye(n)
            H[j+1:, j+1:] = house_mat(v)
            B = B @ H
            Q = Q @ H.T
        
    return P, B, Q

In [3]:
A = np.random.randn(5, 4)
print(A)
U, B, V = bidiagonalize(A)

B[np.abs(B) < 1e-10] = 0
print(B)

print(metrics.tdist(U.T @ U, np.eye(A.shape[0])))
print(metrics.tdist(U @ U.T, np.eye(A.shape[0])))
print(metrics.tdist(V.T @ V, np.eye(A.shape[1])))
print(metrics.tdist(V @ V.T, np.eye(A.shape[1])))
print(metrics.tdist(U @ B @ V.T, A))
print(metrics.tdist(U.T @ A @ V, B))

[[ 0.47298583 -0.68142588  0.2424395  -1.70073563]
 [ 0.75314283 -1.53472134  0.00512708 -0.12022767]
 [-0.80698188  2.87181939 -0.59782292  0.47245699]
 [ 1.09595612 -1.2151688   1.34235637 -0.12214979]
 [ 1.01251548 -0.91386915 -1.02953021  1.20979645]]
[[-1.91532422 -3.2070338   0.          0.        ]
 [ 0.         -1.94402668  1.01069445  0.        ]
 [ 0.          0.          2.11979381 -0.22319192]
 [ 0.          0.          0.         -1.27923758]
 [ 0.          0.          0.          0.        ]]
9.602048270223544e-16
8.727009981945935e-16
4.015450725665593e-16
4.0594436152518875e-16
2.849816482246977e-15
9.341587213510637e-16


In [4]:
A = np.random.randn(5, 5)
print(A)
U, B, V = bidiagonalize(A)

B[np.abs(B) < 1e-10] = 0
print(B)

print(metrics.tdist(U.T @ U, np.eye(A.shape[0])))
print(metrics.tdist(U @ U.T, np.eye(A.shape[0])))
print(metrics.tdist(V.T @ V, np.eye(A.shape[1])))
print(metrics.tdist(V @ V.T, np.eye(A.shape[1])))
print(metrics.tdist(U @ B @ V.T, A))
print(metrics.tdist(U.T @ A @ V, B))

[[ 0.5018723   0.13884618  0.64076111  0.52733267 -1.15436024]
 [-2.21333348 -1.68175651 -1.78809425 -2.21853495 -0.64743078]
 [-0.52840432 -0.03920917  0.21497595 -0.3843588  -0.25390408]
 [ 0.07325207 -0.99720384 -0.71385629  0.03541635 -0.67794537]
 [-0.57188106 -0.10586232  1.33583134  0.31866529 -0.33759525]]
[[-2.40048867  3.06020607  0.          0.          0.        ]
 [ 0.         -1.90671592 -1.22299727  0.          0.        ]
 [ 0.          0.          0.38335897  1.50406777  0.        ]
 [ 0.          0.          0.          0.55758043 -0.64119558]
 [ 0.          0.          0.          0.          0.12554414]]
5.588720735742759e-16
5.553909732345586e-16
5.298405386522589e-16
5.921083967483366e-16
1.0949609758478446e-15
6.241204495255766e-16


In [5]:
A = np.random.randn(7, 3)
print(A)
U, B, V = bidiagonalize(A)

B[np.abs(B) < 1e-10] = 0
print(B)

print(metrics.tdist(U.T @ U, np.eye(A.shape[0])))
print(metrics.tdist(U @ U.T, np.eye(A.shape[0])))
print(metrics.tdist(V.T @ V, np.eye(A.shape[1])))
print(metrics.tdist(V @ V.T, np.eye(A.shape[1])))
print(metrics.tdist(U @ B @ V.T, A))
print(metrics.tdist(U.T @ A @ V, B))

[[-0.58526828 -0.11491994  2.24181779]
 [-3.14741652  0.53513589  0.23249044]
 [ 0.86761195 -1.14821271  2.11434424]
 [ 1.00094276 -0.051415    0.1597877 ]
 [-0.71626359  0.05052283 -0.14333741]
 [ 0.94357539  0.35764423 -0.0834492 ]
 [ 0.6778061   0.55606037  0.22271946]]
[[ 3.72373412  0.53453933  0.        ]
 [ 0.          1.45498399  2.02079949]
 [ 0.          0.         -2.28622672]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.        ]
 [ 0.          0.          0.        ]]
8.282041397872977e-16
8.583257731368093e-16
1.1104540521305056e-16
1.1104540521305056e-16
2.042700056545788e-15
7.12003122376992e-16


The singular values of $J^0$ are the same than $A$.  
$$J^0 = G\Sigma H^T$$
$$A = PG \Sigma H^T Q^T$$
$$U = PG$$
$$V = QH$$

### SVD of a bidiagonal matrix

### Eigenvalues and eigeinvectors a a symetric matrix

Let $B = P^{-1}AP$.  
The matrices $A$ and $B$ are said to be similar. They share several properties, one of them is they both have the same eigenvalues.

The QR algorithm:  
- $[Q_k, R_k] \leftarrow qr(A_k)$
- $A_{k+1} \leftarrow R_kQ_k$

$A_k$, $k \to \infty$ converges to a triangular matrix with the eigenvalues on its diagonal.

$$A = QR$$
$$Q = Q^{-1}$$
$$R = Q^TA$$
$$A_{k+1} = R_kQ_k = Q_k^TA_kQ_k$$
$A_1$, $A_2$ ..., $A_k$ are similar so they share the same eigenvalues: the diagonal entries of $A_k$

If $A$ symetrics, $Q_1Q_2$...$Q_k$ is a matrix whose columns are the eigeinvectors of $A$

In [6]:
def qr_algorithm(A, max_iters=100, prec=1e-6):
    
    Ak = A.copy()
    Qk = np.eye(A.shape[0])
 
    for k in range(max_iters):
        
        Q, R = np.linalg.qr(Ak)
        Ak = R @ Q
        Qk = Qk @ Q
        
        if np.linalg.norm(Ak - np.triu(Ak)) < prec:
            break
    
    vals = np.diag(Ak)
    vects = Qk
    return vals, vects

def test_eig(A, fn): 

    vals, vects = fn(A)
    vals_sol, vects_sol = np.linalg.eigh(A)
    vals_sol = vals_sol[::-1]
    vects_sol = vects_sol[:, ::-1]

    for i in range(vects.shape[1]):
        if vects[0, i] < 0: vects[:, i] = -vects[:, i]
        if vects_sol[0, i] < 0: vects_sol[:, i] = -vects_sol[:, i]

    #print(A @ vects_sol[:, 0] - vects_sol[:, 0] * vals_sol[0])
    print(vals)
    print(vals_sol)
    print(metrics.tdist(vals, vals_sol))
    print(metrics.tdist(vects, vects_sol))

In [7]:
A = np.random.randn(4, 3)
AAT = A @ A.T
ATA = A.T @ A

test_eig(AAT, qr_algorithm)
test_eig(ATA, qr_algorithm)

[ 1.32671370e+01  2.62818435e+00  8.03618898e-02 -1.97806793e-16]
[ 1.32671370e+01  2.62818435e+00  8.03618898e-02 -1.10876012e-15]
6.283949477084132e-14
9.090398311204039e-08
[13.26713699  2.62818435  0.08036189]
[13.26713699  2.62818435  0.08036189]
1.0802243336484212e-13
1.1876547222095116e-07


In [8]:
def qr_algorithm_shift(A, max_iters=100, prec=1e-6):
    
    Ak = A.copy()
    Qk = np.eye(A.shape[0])
    I = np.eye(A.shape[0])
        
    for k in range(max_iters):
        
        lbda = 0.01
        
        Q, R = np.linalg.qr(Ak - lbda * I)
        Ak = R @ Q + lbda * I
        Qk = Qk @ Q
        
        if np.linalg.norm(Ak - np.triu(Ak)) < prec:
            break
    
    vals = np.diag(Ak)
    vects = Qk
    return vals, vects

In [9]:
A = np.random.randn(4, 3)
AAT = A @ A.T
ATA = A.T @ A

test_eig(AAT, qr_algorithm_shift)
test_eig(ATA, qr_algorithm_shift)

[1.07393364e+01 3.40381620e+00 3.90479548e-01 1.75207071e-16]
[1.07393364e+01 3.40381620e+00 3.90479548e-01 4.47097334e-16]
1.1809819656721822e-13
1.5155503025945444e-07
[10.73933638  3.4038162   0.39047955]
[10.73933638  3.4038162   0.39047955]
1.391134812859608e-13
1.624867961266813e-07


## SVD computing $AA^T$

$$A v_i = \sigma_i u_i$$
$$A^T u_i = \sigma_i v_i$$

We can compute the SVD of $A$ naively:
- Find the left singular vectors and the singular values by applying the QR algorithm on $AA^T$
- Find the right singular vectors by computing $v_i = \frac{A^T u_i}{\sigma_i}$

In [10]:
def svd_naive(A):
    lvals, lvects = qr_algorithm(A @ A.T)
    U = lvects 
    S = np.sqrt(lvals[:A.shape[1]])
    
    VT = np.empty((A.shape[1], A.shape[1]))
    for i in range(A.shape[1]):
        VT[i] = (A.T @ U[:, i]) / S[i]
    
    return U, S, VT

In [11]:
A = np.random.randn(4, 3)
U_sol, S_sol, VT_sol = np.linalg.svd(A)

U, S, VT = svd_naive(A)

print(metrics.tdist(U @ U.T, np.eye(A.shape[0])))
print(metrics.tdist(U.T @ U, np.eye(A.shape[0])))
print(metrics.tdist(VT @ VT.T, np.eye(A.shape[1])))
print(metrics.tdist(VT.T @ VT, np.eye(A.shape[1])))
print(metrics.tdist(S, S_sol))

S_mat = np.zeros(A.shape)
S_mat[:len(S),:len(S)] = np.diag(S)
print(metrics.tdist(U @ S_mat @ VT, A))

2.3467323954533475e-15
2.4014112774011663e-15
7.639478459177362e-07
7.639478459163297e-07
2.2371996705809764e-13
4.791998321464427e-15


##  Naive SVD with bidiagonalization

In [12]:
def svd_naive_bidioagonal(A):
    P, J, Q = bidiagonalize(A)
    G, S, HT = svd_naive(J)
    U = P @ G
    VT = HT @ Q.T
    return U, S, VT
    
A = np.random.randn(4, 3)
U_sol, S_sol, VT_sol = np.linalg.svd(A)

U, S, VT = svd_naive_bidioagonal(A)

print(metrics.tdist(U @ U.T, np.eye(A.shape[0])))
print(metrics.tdist(U.T @ U, np.eye(A.shape[0])))
print(metrics.tdist(VT @ VT.T, np.eye(A.shape[1])))
print(metrics.tdist(VT.T @ VT, np.eye(A.shape[1])))
print(metrics.tdist(S, S_sol))

S_mat = np.zeros(A.shape)
S_mat[:len(S),:len(S)] = np.diag(S)
print(metrics.tdist(U @ S_mat @ VT, A))

3.149284723413304e-15
3.1003034947406494e-15
1.692369970107095e-07
1.6923699700461946e-07
3.488362438322582e-14
9.483132718540592e-15


## SVD of bidiagonal matrix

Let $J_0$ of size $m * n$ upper bidiagonal matrix.  
$J$ is iteratively diagonalized to $\Sigma$
$$J_{i+1} = S_i^T J_i T_i$$
$S_i$ and $T_i$ are orthogonal matrices that represent given rotations.

Let $M_i= J_i^TJ_i$ tridiagonal matrix
$$M_{i+1} = T_i^T M_i T_i$$

The transformation $M_i \to M_{i+1}$ is actually a $QR$ transformation wit shift $s$.
$$M_i - sI = T_sR_s$$
$$M_{i+1} = R_sT_s + sI$$

Maybe:
- $J_k \to \Sigma$ (diagonal matrix), singular values of $J_0$
- SVD of diagonal matrix is $IDI$
- $S_1$, $S_2$, ..., $T_1$, $T_2$, ... orthogonal matrices
- $U = S_1 S_2$ ...
- $V = T_1 T_2$ ...