In [1]:
import sys
sys.path.append('../../pyutils')

import numpy as np

import metrics

np.random.seed(12)

# QR Factorisation

Let $A$ matrix of size $m * n$, with $m \geq n$, and $dim(Col A) = n$ (independant columns).  
A can be decomposed as $A = QR$, where $Q$ is a matrix with orthogonal columns ($Q^TQ=I$) of size $m * n$, and $R$ is an upper triangular matrix of size $n * n$.  
If $A$ square, $Q$ is an orthogonal matrix ($QQ^T=Q^TQ=I$).  
The decomposition is unique if all diagonal entries of $R$ are positive.

In [2]:
A = np.random.randn(4, 3)
Q, R = np.linalg.qr(A)
print(A.shape)
print(Q.shape)
print(R.shape)

(4, 3)
(4, 3)
(3, 3)


In [3]:
print(metrics.tdist(Q.T @ Q, np.eye(3)))
print(metrics.tdist(Q @ R, A))
print(metrics.is_utri(R))
print(Q)
print(R)

4.158610537008119e-16
8.675560052622147e-16
True
[[-0.14031056  0.82546298 -0.25514143]
 [ 0.50452075 -0.38925408 -0.58421823]
 [-0.00152094  0.18042135 -0.70849607]
 [-0.85192104 -0.36679731 -0.3026966 ]]
[[-3.37099247  0.98506821 -1.20958426]
 [ 0.         -0.65806756  0.47862866]
 [ 0.          0.          1.26348819]]


## QR Implementation with Gram-Schmidt process

In [4]:
def qr_gram(a):
    
    q = a.copy()
    r = np.eye(a.shape[1])
    
    for j in range(a.shape[1]):
        
        nex = q[:, j]
        for k in range(j):
            prev = q[:, k]
            coeff = np.dot(prev, nex) / np.dot(prev, prev)
            r[k, j] = coeff
            nex -= prev * coeff #update q in-place (nex is a view of q)
        
    #normalize q columns and scale r rows to maintain a = qr
    for j in range(a.shape[1]):
        n = np.linalg.norm(q[:, j])
        q[:, j] /= n
        r[j] *= n
        
    return q, r

In [5]:
Q, R = qr_gram(A)
print(A.shape)
print(Q.shape)
print(R.shape)

(4, 3)
(4, 3)
(3, 3)


In [6]:
print(metrics.tdist(Q.T @ Q, np.eye(3)))
print(metrics.tdist(Q @ R, A))
print(metrics.is_utri(R))
print(Q)
print(R)

4.823698467508981e-16
2.4196749845665633e-16
True
[[ 0.14031056 -0.82546298 -0.25514143]
 [-0.50452075  0.38925408 -0.58421823]
 [ 0.00152094 -0.18042135 -0.70849607]
 [ 0.85192104  0.36679731 -0.3026966 ]]
[[ 3.37099247 -0.98506821  1.20958426]
 [ 0.          0.65806756 -0.47862866]
 [ 0.          0.          1.26348819]]


## QR decomposition with householder

### Householder transformation

Let $x \in \mathbb{R^n}$, $v \in \mathbb{R}^n$ unit vector
$$x \to x - 2v(v^Tx)$$

The householder matrix is the projection matrix for that transformation.
$$P = I - 2vv^T$$
The householder matrix is orthogonal: $P^{-1}=P^T$

The goal is to find the vector $v$ than can transform a vector $x$ into a vector $\alpha e_1$, meaning set all values below a specific entry at 0.

$$u_1 = x_1 + \text{sign}(x1)||x_1||$$
$u_2 = x_2$, $u_3 = x_3$, ...

The transformation is applied to each column. Each time, we only take a submatrix of $A$ in order to get the first non-zero entry at the top, and the full house is extended on the top by the identity matrix, so that every entry on the left and on the top stays the same.

After $n$ iterations, $A$ is trasnformed to an upperdiagonal matrix $R$.  
$$Q = P_1^T P_2^T \text{...} P_{n}^T$$
$$A = QR$$
Q is the product of all transformations, so it's an orthogonal matrix

In [7]:
def house_vect(x):
    v = x.copy()
    v[0] = x[0] + np.sign(x[0]) * np.linalg.norm(x)
    return v
    
def house_mat(v):
    return np.eye(len(v)) - 2 * np.outer(v, v) / (v@v)

def qr_householder(A):
    m, n = A.shape
    
    Q = np.eye(m)
    R = A.copy()
    
    
    for j in range(min(m, n)):
        
        R_sub = R[j:, j:]
        v = house_vect(R_sub[:, 0])
        P = np.eye(m)
        P[j:, j:] = house_mat(v)
        R = P @ R
        Q = Q @ P.T
        
    return Q, R

In [8]:
A = np.random.randn(3, 3)
Q, R = qr_householder(A)

print(metrics.tdist(A, Q @ R))
print(metrics.tdist(Q @ Q.T, np.eye(A.shape[0])))
print(metrics.tdist(Q.T @ Q, np.eye(A.shape[0])))
print(metrics.tdist(R, np.triu(R)))

3.3335590258932494e-16
2.253716146295488e-16
1.4086042880055593e-16
6.158017547833885e-17


If $m > n$, $R$ is upper diagonal, the $m-n$ last rows are all 0

In [9]:
A = np.random.randn(7, 3)
Q, R = qr_householder(A)

print(metrics.tdist(A, Q @ R))
print(metrics.tdist(Q @ Q.T, np.eye(A.shape[0])))
print(metrics.tdist(Q.T @ Q, np.eye(A.shape[0])))
print(metrics.tdist(R, np.triu(R)))

9.471018274707082e-16
8.921940575121625e-16
8.34543037121412e-16
2.9973870621896996e-16


If m < n, we can stop after m iterations, there is no more columns left to put $0$

In [10]:
A = np.random.randn(3, 7)
Q, R = qr_householder(A)

print(metrics.tdist(A, Q @ R))
print(metrics.tdist(Q @ Q.T, np.eye(A.shape[0])))
print(metrics.tdist(Q.T @ Q, np.eye(A.shape[0])))
print(metrics.tdist(R, np.triu(R)))

2.567203199922655e-15
7.376703283992538e-16
7.483555336676825e-16
2.0404170400622705e-16


## QR decomposition with Given rotations

$G(i, j, \theta)$ is a matrix representing a givens rotation.  
The non-zeros entries are:
- $g_{kk} = 1$ for $k \neq i, j$
- $g_{kk}  = \cos(\theta)$ for $k = i,j$
- $g_{ji} = - \sin(\theta)$
- $g_{ij} =  \sin(\theta)$

Givens rotations can be used to put a 0 in a specific entry of a vector or a matrix.

$G$ matrix is orthogonal

$G(i, j, \theta)$ applied to vector $x$ will ony change entries $x_i$ and $x_j$.
We can set the entry $x_j$ to $0$ by solving this system of equations:

$$
\begin{bmatrix} c & -s \\ s & c \end{bmatrix}^T  \left[ \begin{array}{c} x_i \\ x_j \end{array} \right] = \left[ \begin{array}{c} \rho \\ 0 \end{array} \right]
$$

The solutions are:

$$c = \frac{x_i}{\sqrt{x_i^2 + x_j^2}}, \space s = \frac{x_j}{\sqrt{x_i + 2 + x_j^2}}$$

In practice there solutions might overflow, and other more stable solutions are used.

We can apply one Given rotation to each entry we went to set to $0$ in order to transform $A$ into $R$.
$$R = G_{t}^T G_{t-1}^T \text{...} G_2^T G_1^T A$$
$$Q = G_1 G_2 \text{...} G_{t-1} G_{t}$$

In [11]:
def get_angles(a, b):
    if np.abs(b) < 1e-6:
        return 1, 0
    
    elif np.abs(b) > np.abs(a):
        r = a / b
        s = 1 / np.sqrt(1 + r**2)
        c = s*r
        return c, s
    
    else:
        r = b / a
        c = 1 / np.sqrt(1 +r**2)
        s = c*r
        return c,s

def given_mat(n, i, j, c, s):
    G = np.eye(n)
    G[i, i] = c
    G[j, j] = c
    G[i, j] = -s
    G[j, i] = s
    return G

def qr_givens(A):
    
    m, n = A.shape
    
    Q = np.eye(m)
    R = A.copy()
    
    for j in range(n):
        for i in range(m-1, j, -1):
            c, s = get_angles(R[i-1, j], R[i, j])
            G = given_mat(m, i-1, i, c, s)
            R = G.T @ R
            Q = Q @ G
            
    return Q, R

In [12]:
A = np.random.randn(3, 3)
Q, R = qr_givens(A)

print(metrics.tdist(A, Q @ R))
print(metrics.tdist(Q @ Q.T, np.eye(A.shape[0])))
print(metrics.tdist(Q.T @ Q, np.eye(A.shape[0])))
print(metrics.tdist(R, np.triu(R)))

4.644396126208125e-16
8.235826278468719e-17
8.29625407660349e-17
1.0343301410921444e-16


In [13]:
A = np.random.randn(7, 3)
Q, R = qr_givens(A)

print(metrics.tdist(A, Q @ R))
print(metrics.tdist(Q @ Q.T, np.eye(A.shape[0])))
print(metrics.tdist(Q.T @ Q, np.eye(A.shape[0])))
print(metrics.tdist(R, np.triu(R)))

1.2600542234970832e-15
7.300404742763763e-16
7.00736699883665e-16
3.443103986676303e-16


In [14]:
A = np.random.randn(3, 7)
Q, R = qr_givens(A)

print(metrics.tdist(A, Q @ R))
print(metrics.tdist(Q @ Q.T, np.eye(A.shape[0])))
print(metrics.tdist(Q.T @ Q, np.eye(A.shape[0])))
print(metrics.tdist(R, np.triu(R)))

1.3274160260144074e-15
4.712711537270345e-16
4.746853719176824e-16
8.059614808417886e-17
