# TIES581 Project work

Mikael Myyrä  
`mikael.b.myyra@jyu.fi`

In this document I implement and test Krylov subspace methods for
linear systems using NumPy, based on the descriptions of Saad (2003).

# Test problems and library setup

Using numpy for matrix utilities and scipy to read the Harwell-Boeing matrix format.

The matrices are picked from the Harwell-Boeing and FIDAP collections on
[Matrix Market](https://math.nist.gov/MatrixMarket/).
Two of them, ORSIRR1 and FIDAP36, are also used by Saad (2003).
FIDAP005 is a smaller problem that is helpful in early testing
because you can reasonably print and read it.

Following Saad (2003) chapter 3.7, the right-hand side of $Ax = b$ is generated
as $b = Ae$ where $e = (1,1,\dots,1)^T$, and the initial guess $x_0$
is a vector of random values. Saad does not specify the range
or distribution of random values, so I am assuming the conventional
uniform distribution in the range $[0, 1)$.

In [1]:
import numpy as np
import scipy as sp
from scipy.io import mmread

import math
import time
from dataclasses import dataclass
from typing import Callable
from typing import Union

def default_rhs(A) -> np.ndarray:
    return A * np.ones((A.shape[1],))

def random_guess(A) -> np.ndarray:
    return np.random.random_sample((A.shape[1],))

@dataclass
class Equation:
    name: str
    A: np.ndarray
    b: np.ndarray

    def residual(self, x: np.ndarray) -> np.ndarray:
        return self.b - self.A * x

TEST_EQUATIONS: list[Equation] = [
    Equation(name=name, A=mat, b=default_rhs(mat))
    for name, mat in [
        ("FIDAP005", mmread("test_matrices/fidap005.mtx")),
        ("FIDAP036", mmread("test_matrices/fidap036.mtx")),
        ("GR3030", mmread("test_matrices/gr_30_30.mtx")),
        ("ORSIRR1", mmread("test_matrices/orsirr_1.mtx")),
    ]
]

MAX_ITERATIONS = 300

@dataclass
class RunResult:
    ans: np.ndarray
    iterations: int

def test_run(method: Callable[[Equation, np.ndarray], np.ndarray], **kwargs):
    """Run the given method on all the test problems and print statistics."""

    # pretty-printing results as a table
    CELL_SIZE = 16
    def fmt_cell(x: Union[str, float]) -> str:
        text = f"{x:.3e}" if isinstance(x, float) else str(x)
        return text.center(CELL_SIZE)
    def print_row(cells: list[Union[str, float]]):
        print("|".join([fmt_cell(c) for c in cells]))

    headers = ["matrix", "residual norm", "iterations", "time (ms)"]
    print_row(headers)
    print_row(["-" * CELL_SIZE] * len(headers))
    for eq in TEST_EQUATIONS:
        start_time = time.perf_counter_ns()
        result = method(eq, random_guess(eq.A), **kwargs)
        duration_ns = time.perf_counter_ns() - start_time
        duration_ms = duration_ns // 1000000
        resid = np.linalg.norm(eq.b - eq.A * result.ans)
        print_row([eq.name, resid, result.iterations, duration_ms])

np.set_printoptions(precision=5)

# Methods

## Full Orthogonalization Method (FOM)

FOM (or Arnoldi's Method) is an algorithm to approximately solve
a linear system $Ax = b$ using a Krylov subspace

$$
\mathcal{K}_m = \text{span}\{r_0, Ar_0, A^2r_0, \dots, A^{m-1}r_0\}
$$

where $m$ is the dimension of the subspace and $r_0 = b - Ax_0$ is the residual
of some initial guess $x_0$. $\mathcal{K}_m$ is related to the original problem
space by

$$
V_m^TAV_m = H_m
$$

where $V_m$ is an orthonormal basis of $\mathcal{K}_m$ and $H_m$ is a
$m \times m$ Hessenberg matrix. Arnoldi's method computes these matrices.

The benefit of this approach is that a problem based on $H_m$ is
smaller than the original problem (controllable by the choice of subspace dimension $m$)
and its Hessenberg structure makes it easier to solve with direct methods.
The tradeoff is that the smaller $m$ is, the less accurate the solution will be.

Getting an accurate result in one iteration of FOM requires working in a
high-dimensional Krylov subspace, which has a computational complexity of $O(m^2)$ due to
orthogonalization requiring dot products with all previously computed basis vectors.
Saad (2003) presents two variants of FOM to alleviate this, the Restarted and Incomplete versions.

### Restarted FOM

Restarted FOM simply runs FOM repeatedly with a small $m$ until a desired
precision is achieved.


In [2]:
def restarted_fom(eq: Equation, x0: np.ndarray, subsp_dim: int) -> RunResult:
    """Approximately solve `Ax = b` using the Full Orthogonalization Method
    with Krylov subspace dimension `subsp_dim`."""

    # Krylov subspace can't be larger than the column count of A
    if eq.A.shape[1] < subsp_dim:
        subsp_dim = eq.A.shape[1]

    # stop condition from Saad (2003): reduce residual norm by a factor of 10^7
    initial_resid_norm = np.linalg.norm(eq.residual(x0))
    stop_resid_limit = 1e-7 * initial_resid_norm
    x = x0
    iter_count = 0
    while iter_count < MAX_ITERATIONS:
        iter_count += 1
        # current residual
        resid = eq.residual(x)
        resid_norm = np.linalg.norm(resid)
        # stop if we're close enough to the correct answer
        # or the algorithm failed and diverged to infinity (this happens with FIDAP036)
        if resid_norm < stop_resid_limit or math.isinf(resid_norm):
            break
        # orthonormal basis of the Krylov subspace,
        # filled in over the course of the algorithm
        V = np.zeros((eq.A.shape[1], subsp_dim))
        # the Hessenberg matrix H in the relation (V^T)AV = H
        H = np.zeros((subsp_dim, subsp_dim))
        # first basis vector of the Krylov subspace based on the residual
        V[:,0] = resid / resid_norm

        for col in range(subsp_dim):
            # the next vector in the Krylov subspace's basis
            w = eq.A * V[:,col]
            # orthogonalize using Modified Gram-Schmidt
            for prev_col in range(col+1):
                H[prev_col, col] = w.dot(V[:,prev_col])
                w -= H[prev_col, col] * V[:,prev_col]
            if col+1 == subsp_dim:
                break
            H[col+1, col] = np.linalg.norm(w)
            if H[col+1, col] < 1e-10:
                # terminated early because the Krylov subspace's actual dimension
                # is less than what was given as parameter.
                # resize the matrices H and V so that they're not singular if this happens
                H = H[:col+1, :col+1]
                V = V[:, :col+1]
                break
            V[:,col+1] = w / H[col+1, col]

        # Using a prebuilt routine for solving the Hessenberg system for now.
        # I know Saad (2003) has a method for this, but that seems less relevant than
        # trying many different methods, so I'll leave it for later if I have time.
        h_rhs = np.zeros((H.shape[1], 1))
        h_rhs[0] = resid_norm
        y = np.linalg.solve(H, h_rhs)

        # y and its product with V are 2D column vectors,
        # but the rest of the code works in 1D vectors, hence the weird indexing here
        x += np.dot(V, y)[:,0]

    return RunResult(
        ans=x,
        iterations=iter_count,
    )

In [3]:
for dim in [10, 30, 50]:
    print(f"Subspace dimension: {dim}")
    test_run(restarted_fom, subsp_dim=dim)
    print("")

Subspace dimension: 10
     matrix     | residual norm  |   iterations   |   time (ms)    
----------------|----------------|----------------|----------------
    FIDAP005    |   1.192e+01    |      300       |      181       
    FIDAP036    |   1.511e+03    |      300       |      684       
     GR3030     |   7.092e-06    |       13       |       10       
    ORSIRR1     |   1.249e+02    |      300       |      334       

Subspace dimension: 30
     matrix     | residual norm  |   iterations   |   time (ms)    
----------------|----------------|----------------|----------------
    FIDAP005    |   1.384e-09    |       2        |       3        
    FIDAP036    |   5.544e+39    |      300       |      2622      
     GR3030     |   4.734e-07    |       4        |       15       
    ORSIRR1     |   5.426e-02    |       73       |      361       

Subspace dimension: 50
     matrix     | residual norm  |   iterations   |   time (ms)    
----------------|----------------|-----------

### Direct Incomplete Orthogonalization Method (DIOM)

The basic idea of incomplete orthogonalization is simple: reduce the cost
of orthogonalization by only comparing against the last $k$ computed basis vectors.
However, this only reduces computational costs while keeping memory costs
the same. With some additional work, a special LU factorization can be created
that does not need to store the entire basis $V_m$ in memory.
This is the Direct variant detailed in chapter 6.4.2 of Saad (2003).


In [7]:
from collections import deque

def direct_iom(eq: Equation, x0: np.ndarray, ortho_count: int) -> RunResult:
    """Approximately solve `Ax = b` using the Direct Incomplete
    Orthogonalization Method, orthogonalizing against `ortho_count` vectors."""

    initial_resid = eq.b - eq.A * x0
    initial_resid_norm = np.linalg.norm(initial_resid)
    stop_resid_limit = 1e-7 * initial_resid_norm

    # for state, storing bounded deques of the last `ortho_count` values computed.
    # this automatically handles dropping values we don't need anymore.
    def empty_deque() -> deque:
        return deque([], ortho_count)

    # V and H as in FOM (basis of K_m and problem in K_m, respectively)
    V = empty_deque()
    # first basis vector from the initial residual
    V.append(initial_resid / initial_resid_norm)
    H = empty_deque()
    # LU factorization of H
    L = empty_deque()
    U = empty_deque()
    # zeta = L^{-1}(\beta e_1)
    zeta = empty_deque()
    # P = VU^{-1}
    P = empty_deque()
    
    x = x0
    step_idx = 0
    # stop if we cover all of the dimension of A without converging
    while step_idx < eq.A.shape[1]:
        # check convergence
        resid_norm = np.linalg.norm(eq.residual(x))
        if resid_norm < stop_resid_limit or math.isinf(resid_norm):
            break

        # we only store `ortho_count` columns of each matrix at a time,
        # so indices into the state arrays are offset from "real" indices by this much
        first_stored_col = max(0, step_idx - ortho_count)

        next_v = eq.A * V[-1]
        # only store the nonzero entries of H, of which there are at most
        # `ortho_count + 1` because H is banded Hessenberg
        next_h = np.zeros(ortho_count + 1)
        for col in range(len(V)):
            next_h[col] = next_v.dot(V[col])
            next_v -= next_h[col] * V[col]
        # entry of H below the main diagonal
        next_h[len(V)] = np.linalg.norm(next_v)
        
        V.append(next_v)
        H.append(next_h)

        # TODO: LU factorization how?

        next_zeta = -L[-1] * zeta[-1]
        zeta.append(next_zeta)

        diag_idx = step_idx - first_stored_col
        next_p = (
            (1.0 / U[-1][diag_idx])
            * (V[-1] - sum([U[-1][i] * P[i] for i in range(diag_idx)]))
        )
        P.append(next_p)

        x += next_zeta * next_p
        step_idx += 1

    return RunResult(
        ans=x,
        iterations=iter_count,
    )

In [8]:
for ortho_count in [5, 10, 30]:
    print(f"Orthogonal vectors: {ortho_count}")
    test_run(direct_iom, ortho_count=ortho_count)
    print("")

Orthogonal vectors: 5
     matrix     | residual norm  |   iterations   |   time (ms)    
----------------|----------------|----------------|----------------


IndexError: deque index out of range

# Sources

Saad, Y. (2003). Iterative Methods for Sparse Linear Systems.