# Sparse Linear Algebra
- Sparse solvers, Eigenvalues, Iterative methods
- Real examples: Network analysis, Solving large systems

In [1]:
import numpy as np
from scipy import sparse
from scipy.sparse import linalg as sp_linalg
import matplotlib.pyplot as plt
print('Sparse linear algebra module loaded')

Sparse linear algebra module loaded


## Sparse Linear System Solving

**Problem**: Solve Ax = b where A is sparse

**Methods**:
- **Direct**: `spsolve()` - LU factorization
- **Iterative**: `cg()`, `gmres()`, `bicgstab()` - for very large systems

**When to use**:
- Direct: General, moderate size
- Iterative: Very large, symmetric

In [2]:
# Linear system: Ax = b
n = 1000
# Create sparse positive definite matrix
diag_vals = np.random.rand(n) + 2  # Diagonal dominant
A = sparse.diags([[-1]*(n-1), diag_vals, [-1]*(n-1)], [-1, 0, 1], format='csr')
b = np.random.rand(n)

print(f'Solving Ax = b')
print(f'  Matrix size: {n}×{n}')
print(f'  Non-zeros: {A.nnz:,}')
print(f'  Density: {A.nnz/n**2*100:.3f}%\n')

# Direct solver
import time
start = time.time()
x_direct = sp_linalg.spsolve(A, b)
time_direct = time.time() - start

print(f'Direct solver (spsolve):')
print(f'  Time: {time_direct*1000:.2f} ms')
print(f'  Residual: {np.linalg.norm(A @ x_direct - b):.2e}\n')

# Iterative solver (Conjugate Gradient)
start = time.time()
x_iter, info = sp_linalg.cg(A, b, tol=1e-10)
time_iter = time.time() - start

print(f'Iterative solver (CG):')
print(f'  Time: {time_iter*1000:.2f} ms')
print(f'  Residual: {np.linalg.norm(A @ x_iter - b):.2e}')
print(f'  Convergence: {"Success" if info == 0 else "Failed"}')

Solving Ax = b
  Matrix size: 1000×1000
  Non-zeros: 2,998
  Density: 0.300%

Direct solver (spsolve):
  Time: 1.30 ms
  Residual: 6.94e-15



TypeError: cg() got an unexpected keyword argument 'tol'. Did you mean 'rtol'?

## Eigenvalue Problems

**Problem**: Find eigenvalues/eigenvectors of sparse matrix

**Function**: `eigsh()` (symmetric), `eigs()` (general)

**Parameters**:
- `k`: Number of eigenvalues
- `which`: 'LM' (largest), 'SM' (smallest), 'LA' (largest algebraic)

**Use**: PageRank, spectral clustering, PCA

In [None]:
# Symmetric sparse matrix
n = 500
A = sparse.random(n, n, density=0.01, format='csr')
A = A + A.T  # Make symmetric

print(f'Eigenvalue problem')
print(f'  Matrix size: {n}×{n}')
print(f'  Non-zeros: {A.nnz:,}\n')

# Compute largest 5 eigenvalues
k = 5
start = time.time()
eigenvalues, eigenvectors = sp_linalg.eigsh(A, k=k, which='LM')
time_eigen = time.time() - start

print(f'Computed {k} largest eigenvalues:')
for i, eig in enumerate(eigenvalues[::-1], 1):
    print(f'  λ{i} = {eig:.4f}')
print(f'\nTime: {time_eigen*1000:.2f} ms')

## Real Example: PageRank Algorithm

**Problem**: Rank web pages by importance
**Method**: Find dominant eigenvector of transition matrix

**Google's original algorithm!**

In [None]:
# Simulate web graph
n_pages = 1000
avg_links = 10

print('PageRank Calculation')
print(f'  Pages: {n_pages:,}')
print(f'  Avg outlinks: {avg_links}\n')

# Build link matrix (sparse)
np.random.seed(42)
rows, cols = [], []
for page in range(n_pages):
    n_links = np.random.poisson(avg_links)
    links = np.random.choice(n_pages, size=min(n_links, n_pages-1), replace=False)
    rows.extend([page] * len(links))
    cols.extend(links)

links_matrix = sparse.coo_array((np.ones(len(rows)), (rows, cols)), 
                                shape=(n_pages, n_pages))
links_csr = links_matrix.tocsr()

print(f'Link matrix:')
print(f'  Links: {links_csr.nnz:,}')
print(f'  Density: {links_csr.nnz/n_pages**2*100:.3f}%\n')

# Normalize (transition probability)
out_degree = np.array(links_csr.sum(axis=1)).flatten()
out_degree[out_degree == 0] = 1  # Avoid division by zero
P = links_csr.copy()
P.data /= np.repeat(out_degree, np.diff(links_csr.indptr))

# PageRank with damping
damping = 0.85
I = sparse.eye(n_pages, format='csr')
A = I - damping * P.T
b = (1 - damping) / n_pages * np.ones(n_pages)

# Solve
pagerank = sp_linalg.spsolve(A, b)
pagerank = pagerank / pagerank.sum()  # Normalize

print('PageRank computed!')
print(f'\nTop 10 pages:')
top_10 = np.argsort(pagerank)[::-1][:10]
for i, page in enumerate(top_10, 1):
    print(f'  {i}. Page {page}: rank={pagerank[page]:.6f}')

## Matrix Norms

**Sparse norms**:
- Frobenius: `norm(A, 'fro')`
- Spectral: Largest singular value
- 1-norm, inf-norm

**Efficient for sparse matrices**

In [None]:
A = sparse.random(100, 100, density=0.05, format='csr')

print('Matrix norms:')
print(f'  Frobenius: {sp_linalg.norm(A, "fro"):.4f}')

# For spectral norm, use largest singular value
if A.shape[0] < 200:  # Only for small matrices
    u, s, vt = sp_linalg.svds(A, k=1)
    print(f'  Spectral: {s[0]:.4f}')

## Real Example: Heat Equation (Finite Differences)

**Problem**: Solve heat diffusion on grid
**Discretization**: Laplacian matrix (sparse!)

**Applications**: Physics simulation, image processing

In [None]:
# 2D heat equation: du/dt = α * Laplacian(u)
n = 30  # Grid size
alpha = 0.01  # Diffusivity
dt = 0.1

print(f'2D Heat Equation')
print(f'  Grid: {n}×{n}')
print(f'  Total points: {n**2}\n')

# Build 2D Laplacian matrix (5-point stencil)
N = n * n
diagonals = [
    -4 * np.ones(N),  # Center
    np.ones(N-1),     # Left/right
    np.ones(N-1),
    np.ones(N-n),     # Up/down
    np.ones(N-n)
]

# Handle boundaries (not wrap around)
for i in range(n-1, N, n):
    diagonals[1][i] = 0
    diagonals[2][i-1] = 0

Laplacian = sparse.diags(diagonals, [0, -1, 1, -n, n], format='csr')

print(f'Laplacian matrix:')
print(f'  Size: {N}×{N}')
print(f'  Non-zeros: {Laplacian.nnz:,}')
print(f'  Non-zeros per row: ~5 (sparse!)\n')

# Initial condition: hot spot in center
u = np.zeros(N)
center = N // 2
u[center] = 100.0

# Time evolution: u(t+dt) = u(t) + dt * alpha * Laplacian @ u(t)
I = sparse.eye(N, format='csr')
A = I + dt * alpha * Laplacian

print('Simulating heat diffusion...')
for step in range(100):
    u = A @ u

# Reshape to 2D
u_grid = u.reshape(n, n)

print(f'After 100 time steps:')
print(f'  Max temperature: {u_grid.max():.2f}')
print(f'  Min temperature: {u_grid.min():.2f}')
print(f'  Average: {u_grid.mean():.2f}')

## Summary

### Sparse Linear Solvers:
```python
from scipy.sparse import linalg as sp_linalg

# Direct solver
x = sp_linalg.spsolve(A, b)

# Iterative solvers
x, info = sp_linalg.cg(A, b)  # Conjugate gradient (symmetric)
x, info = sp_linalg.gmres(A, b)  # GMRES (general)
x, info = sp_linalg.bicgstab(A, b)  # BiCGSTAB
```

### Eigenvalue Problems:
```python
# Symmetric
eigenvalues, eigenvectors = sp_linalg.eigsh(A, k=5, which='LM')

# General
eigenvalues, eigenvectors = sp_linalg.eigs(A, k=5, which='LM')
```

### Norms and SVD:
```python
# Norms
norm_fro = sp_linalg.norm(A, 'fro')

# SVD (k largest singular values)
u, s, vt = sp_linalg.svds(A, k=10)
```

### Applications:
✓ **PageRank**: Eigenvector of web graph  
✓ **PDEs**: Finite differences/elements  
✓ **Image**: Laplacian for edge detection  
✓ **ML**: Large-scale regression, PCA  
✓ **Networks**: Centrality, community detection  

### Performance:
- **Direct**: O(n³) → O(nnz × n) for sparse
- **Iterative**: O(nnz) per iteration
- **Eigenvalues**: Much faster than dense