# QR Factorization and Least Squares #
    - Matt Robinson
    
NOTE: a lot of these notes are taken from fast.ai's course notes, http://nbviewer.jupyter.org/github/fastai/numerical-linear-algebra/blob/master/nbs/8.%20Implementing%20QR%20Factorization.ipynb#Gram-Schmidt

In [43]:
import numpy as np
np.set_printoptions(suppress=True, precision=4)

In [177]:
def classical_gram_schmidt_QR(A):
    '''
    QR factorization using classic (unmodified)
    Gram-Schmidt Orthoginalization
    
    The n columns of Q will be orthonormal
    R will be upper triangular
    
    More generally, the first k <= n columns of Q form an
    orthonormal basis for the span of the first k columns of A.
    The fact that any column k of A only depends on the first k
    columns of Q is responsible for the triangular form of R.
    (From https://en.wikipedia.org/wiki/QR_decomposition)
    
    NOTE: this algorithm is numerically UNSTABLE
    '''
    
    m, n = A.shape
    
    Q = np.zeros([m,n], dtype=np.float64)
    R = np.zeros([n,n], dtype=np.float64)
    
    for j in range(n):
        v = A[:,j]
        for i in range(j):
            R[i,j] = np.dot(Q[:,i], A[:,j])
            v = v - (R[i,j] * Q[:,i])
        R[j,j] = np.linalg.norm(v)
        Q[:, j] = v / R[j,j]
        
    return Q, R

In [45]:
def modified_gram_schmidt_QR(A):
    '''
    QR factorization using modified
    Gram-Schmidt Orthoginalization
    
    Classical Gram-Schmidt (CGS), takes a vector 
    and makes it orthogonal to all previous vectors. 
    
    In modified Gram-Schmidt (MGS), all forthcoming vectors 
    are modified to be orthogonal to the current vector.
    
    They are mathematically equivalent
    (https://www.math.uci.edu/~ttrogdon/105A/html/Lecture23.html)
    
    Advantage is that this algorithm maintains
    orthogonality among vectors despite small
    floating point errors.
    
    The n columns of Q will be orthonormal
    R will be upper triangular
    
    NOTE: this algorithm is numerically STABLE
    '''
    
    V = A.copy()
    m, n = A.shape
    
    Q = np.zeros([m,n], dtype=np.float64)
    R = np.zeros([n,n], dtype=np.float64)
    
    # loop over columns of V
    for i in range(n):
        
        # set qi = normalized ith column of V
        R[i,i] = np.linalg.norm(V[:,i])
        Q[:,i] = V[:,i] / R[i,i]
        
        for j in range(i, n):
            R[i,j] = np.dot(Q[:,i],V[:,j])
            V[:,j] = V[:,j] - R[i,j]*Q[:,i]
            
    return Q, R

In [98]:
def householder_QR(A):
    '''
    Uses Householder reflections to construct a QR factorization.
    
    Zeros are stystematically introduced under the diagonal
    using Householder reflectors. The succesive multiplication
    by Householder matrices gives the upper triangular matrix R.
    
    The process looks as follows:
    
    H_n H_n-1 ... H_2 H_1 A = R
    
    Each H_i each is orthogonal matrix.
    Therefore, so is the product H_n H_n-1 ... H_2 H_1
    This product is simply Q^T.
    '''
    m, n = A.shape
    
    QT = np.eye(m, dtype=np.float64)
    R = np.copy(A)
    
    # loop over columns of A
    for k in range(n):
        
        u = np.zeros(m)
        ak = R[:,k] # note that R is initially a copy of A
        
        # creat u where first k-1 elements are 0
        # use of sign fuction is to avoid floating point errors
        u[k] = np.sign(ak[k])*np.linalg.norm(ak[k:]) + ak[k]
        u[k+1:] = ak[k+1:]
        
        # normalize u and call it v
        v = u/np.linalg.norm(u)
        v = v.reshape(-1,1)
        
        # construct the housholder reflector
        H = (np.eye(m) - (2*(v @ v.T)))
        
        QT = H @ QT
        R = H @ R
        
    #return QT.T[:,:n], R[:n,:]
    return QT.T, R
             

In [174]:
def givens_QR(A):
    
    m, n = A.shape
    
    QT = np.eye(m, dtype=np.float64)
    R = np.copy(A)
    
    # loop over lower triangular elements
    i_list, j_list = np.tril_indices(n=m,k=-1,m=n)
    for lower_triangular_element in zip(i_list, j_list):
                                        
        i,j = lower_triangular_element
        
        # skip over elements that are already zero
        if R[i,j] != 0: 

            # note that the diagonal element in the given column is R[j,j]
            cos = R[j,j]/np.sqrt((R[i,j]**2) + (R[j,j]**2))
            sin = R[i,j]/np.sqrt((R[i,j]**2) + (R[j,j]**2))
            
            G = np.eye(m)
            G[i,i] = cos
            G[j,j] = cos
            G[i,j] = -1*sin
            G[j,i] = sin
            
            # multiply by G
            QT = G @ QT
            R = G @ R
        
    #return QT.T[:,:n], R[:n,:]
    return QT.T, R

In [175]:
import unittest

class QR_Test(unittest.TestCase):
    
    # make random matrices
    m, n = 10, 5
    
    A = np.random.rand(m,m)
    B = np.random.rand(n,n)
    C = np.random.rand(m,n)
    
    '''
    Tests three things for each matrix:

    1. Test equality of A = QR for each test matrix
    2. Test Orthogonality of Q. i.e., Q^T*Q = I and Q*Q^T = I
    3. Test that R is upper triangular using np.triu() method
    '''
     
    def test_modified_gram_schmidt(self):
        '''
        Note that we deleted a part of orthogonality test here.
        This is b/c we only require columns of Q are orthonormal 
        when decomposing a non-square matrix.
        '''
        
        mgsQ, mgsR = modified_gram_schmidt_QR(self.A)
        self.assertTrue(np.allclose(mgsQ@mgsR,self.A))
        self.assertTrue(np.allclose(mgsQ.T@mgsQ, np.eye(mgsQ.shape[1])))
        # self.assertTrue(np.allclose(mgsQ@mgsQ.T, np.eye(mgsQ.shape[0])))
        self.assertTrue(np.allclose(mgsR, np.triu(mgsR)))

        mgsQ, mgsR = modified_gram_schmidt_QR(self.B)
        self.assertTrue(np.allclose(mgsQ@mgsR,self.B))
        self.assertTrue(np.allclose(mgsQ.T@mgsQ, np.eye(mgsQ.shape[1])))
        # self.assertTrue(np.allclose(mgsQ@mgsQ.T, np.eye(mgsQ.shape[0])))
        self.assertTrue(np.allclose(mgsR, np.triu(mgsR)))

        mgsQ, mgsR = modified_gram_schmidt_QR(self.C)
        self.assertTrue(np.allclose(mgsQ@mgsR,self.C))
        self.assertTrue(np.allclose(mgsQ.T@mgsQ, np.eye(mgsQ.shape[1])))
        # self.assertTrue(np.allclose(mgsQ@mgsQ.T, np.eye(mgsQ.shape[0])))
        self.assertTrue(np.allclose(mgsR, np.triu(mgsR)))
        
    def test_householder(self):
        
        hQ, hR = householder_QR(self.A)
        self.assertTrue(np.allclose(hQ@hR,self.A))
        self.assertTrue(np.allclose(hQ.T@hQ, np.eye(hQ.shape[1])))
        self.assertTrue(np.allclose(hQ@hQ.T, np.eye(hQ.shape[0])))
        self.assertTrue(np.allclose(hR, np.triu(hR)))
        
        hQ, hR = householder_QR(self.B)
        self.assertTrue(np.allclose(hQ@hR,self.B))
        self.assertTrue(np.allclose(hQ.T@hQ, np.eye(hQ.shape[1])))
        self.assertTrue(np.allclose(hQ@hQ.T, np.eye(hQ.shape[0])))
        self.assertTrue(np.allclose(hR, np.triu(hR)))
        
        hQ, hR = householder_QR(self.C)
        self.assertTrue(np.allclose(hQ@hR,self.C))
        self.assertTrue(np.allclose(hQ.T@hQ, np.eye(hQ.shape[1])))
        self.assertTrue(np.allclose(hQ@hQ.T, np.eye(hQ.shape[0])))
        self.assertTrue(np.allclose(hR, np.triu(hR)))
        
    def test_givens(self):
        
        gQ, gR = givens_QR(self.A)
        self.assertTrue(np.allclose(gQ@gR,self.A))
        self.assertTrue(np.allclose(gQ.T@gQ, np.eye(gQ.shape[1])))
        self.assertTrue(np.allclose(gQ@gQ.T, np.eye(gQ.shape[0])))
        self.assertTrue(np.allclose(gR, np.triu(gR)))
        
        gQ, gR = givens_QR(self.B)
        self.assertTrue(np.allclose(gQ@gR,self.B))
        self.assertTrue(np.allclose(gQ.T@gQ, np.eye(gQ.shape[1])))
        self.assertTrue(np.allclose(gQ@gQ.T, np.eye(gQ.shape[0])))
        self.assertTrue(np.allclose(gR, np.triu(gR)))
        
        gQ, gR = givens_QR(self.C)
        self.assertTrue(np.allclose(gQ@gR,self.C))
        self.assertTrue(np.allclose(gQ.T@gQ, np.eye(gQ.shape[1])))
        self.assertTrue(np.allclose(gQ@gQ.T, np.eye(gQ.shape[0])))
        self.assertTrue(np.allclose(gR, np.triu(gR)))
        

In [176]:
if __name__ == '__main__':
    unittest.main(argv=['first-arg-is-ignored'], exit=False)

...
----------------------------------------------------------------------
Ran 3 tests in 0.013s

OK


### Using QR to find Eigenvalues: ###



### Using the QR factorization for Least Squares ###

We know that minimzing $\| A x - b \| ^ { 2 }$ gives us the following unique solution if $A$ has linearly independent columns:

$$
\hat { x } = A ^ { \dagger } b = \left( A ^ { T } A \right) ^ { - 1 } A ^ { T } b
$$

Let's see how we can use the QR factorization to help in solving this (taken from http://www.seas.ucla.edu/~vandenbe/133A/lectures/ls.pdf):

$$
\begin{aligned} \hat { x } = \left( A ^ { T } A \right) ^ { - 1 } A ^ { T } b & = \left( ( Q R ) ^ { T } ( Q R ) \right) ^ { - 1 } ( Q R ) ^ { T } b \\ & = \left( R ^ { T } Q ^ { T } Q R \right) ^ { - 1 } R ^ { T } Q ^ { T } b \\ & = \left( R ^ { T } R \right) ^ { - 1 } R ^ { T } Q ^ { T } b \\ & = R ^ { - 1 } R ^ { - T } R ^ { T } Q ^ { T } b \\ & = R ^ { - 1 } Q ^ { T } b \end{aligned}
$$

Thus, the algorithm for finding the least squares solution is simply:

$$
\begin{array} { l } { \text { 1. compute QR factorization } A = Q R \quad \left( 2 m n ^ { 2 } \text { flops if } A \text { is } m \times n \right) } \\ { \text { 2. matrix-vector product } d = Q ^ { T } b \quad ( 2 m n \text { flops } ) } \\ { \text { 3. solve } R x = d \text { by back substitution }\quad \left( n ^ { 2 } \text { flops } \right) } \\ { \text { complexity: } 2 m n ^ { 2 } \text { flops } } \end{array}
$$