<a href="https://colab.research.google.com/github/stephenbeckr/randomized-algorithm-class/blob/master/Demos/demo04_FrobeniusNorm_sparse.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Demo 4: calculating the Frobenius norm, looping over rows vs columns, **sparse** matrices

Demonstrates effect of stride length, and row- or column-based storage

This is similar to Demo 3, but now with sparse matrices, not dense matrices

Stephen Becker, Aug 2021, APPM 5650 Randomized Algorithms, University of Colorado Boulder

In [6]:
import numpy as np
import scipy.sparse
import scipy.sparse.linalg
rng = np.random.default_rng(12345)

In [34]:
def FrobeniusNormByRow(A, use_blas = True):
  """ Outer loop over rows (inner loop over columns) """
  if scipy.sparse.issparse(A) and use_blas:
    norm = scipy.sparse.linalg.norm
  else:
    norm = np.linalg.norm
  m,n = A.shape
  nrm = 0.
  if use_blas:
    for row in range(m):
      nrm += norm( A[row,:] )**2  # this is Euclidean norm, not Frobenius
  elif scipy.sparse.issparse(A):
    for row in range(m):
      _,_,v = scipy.sparse.find(A[row,:])
      for vi in v:
        nrm += vi**2
  else:
    for row in range(m):
      for col in range(n):
        nrm += A[row,col]**2
  return np.sqrt(nrm)

def FrobeniusNormByColumn(A, use_blas = True):
  """ Outer loop over columns (inner loop over rows) """
  if scipy.sparse.issparse(A) and use_blas:
    norm = scipy.sparse.linalg.norm
  else:
    norm = np.linalg.norm
  m,n = A.shape
  nrm = 0.
  if use_blas:
    for col in range(n):
      nrm += norm( A[:,col] )**2  # this is Euclidean norm, not Frobenius
  elif scipy.sparse.issparse(A):
    for col in range(n):
      _,_,v = scipy.sparse.find(A[:,col])
      for vi in v:
        nrm += vi**2
  else:
    for col in range(n):
      for row in range(m):
        nrm += A[row,col]**2
  return np.sqrt(nrm)

#### Run some experiments

In [4]:
n   = int(1e4)
m   = n
density   = 0.01

A   = scipy.sparse.random( m, n, density, format='csc') # Compressed Sparse Column

In [15]:
# %time nrm = np.linalg.norm(A) # doesn't work if A is sparse
%time nrm = scipy.sparse.linalg.norm(A) # use this instead
print(f'The true norm is {nrm:.6e}')

CPU times: user 5.96 ms, sys: 52 µs, total: 6.02 ms
Wall time: 6.59 ms
The true norm is 5.769165e+02


In [24]:
%time nrmRow = FrobeniusNormByRow(A, use_blas = True)
print(f'Looping over rows, the discrepancy in the norm is {nrmRow-nrm:.8e}')

CPU times: user 34.8 s, sys: 16.8 ms, total: 34.8 s
Wall time: 34.7 s
Looping over rows, the discrepancy in the norm is 5.68434189e-13


In [29]:
%time nrmRow = FrobeniusNormByColumn(A, use_blas = True)
print(f'Looping over columns, the discrepancy in the norm is {nrmRow-nrm:.8e}')

CPU times: user 2.99 s, sys: 9.95 ms, total: 3 s
Wall time: 3 s
Looping over columns, the discrepancy in the norm is -1.13686838e-13


### Repeat the experiment without using BLAS
Let's make the matrix smaller so we don't have to wait so long

Here there is less difference, because there's already a lot of overhead just due to the `for` loop (since Python isn't compiled)

In [40]:
n   = int(4e3)
m   = n
density   = 0.02

A   = scipy.sparse.random( m, n, density, format='csc') # Compressed Sparse Column

# %time nrm = np.linalg.norm(A) # doesn't work if A is sparse
%time nrm = scipy.sparse.linalg.norm(A) # use this instead
print(f'The true norm is {nrm-n:.6f} + ', n)

CPU times: user 2.58 ms, sys: 24 µs, total: 2.61 ms
Wall time: 2.62 ms
The true norm is -3673.326477 +  4000


In [41]:
%time nrmRow = FrobeniusNormByRow(A, use_blas = True)
print(f'Looping over rows, the discrepancy in the norm is {nrmRow-nrm:.8e}')

%time nrmRow = FrobeniusNormByRow(A, use_blas = False)
print(f'Looping over rows (no BLAS), the discrepancy in the norm is {nrmRow-nrm:.8e}')

CPU times: user 5.48 s, sys: 2.17 ms, total: 5.49 s
Wall time: 5.48 s
Looping over rows, the discrepancy in the norm is 2.27373675e-13
CPU times: user 5.43 s, sys: 3.25 ms, total: 5.43 s
Wall time: 5.43 s
Looping over rows (no BLAS), the discrepancy in the norm is 1.19371180e-12


In [42]:
%time nrmRow = FrobeniusNormByColumn(A, use_blas = True)
print(f'Looping over columns, the discrepancy in the norm is {nrmRow-nrm:.8e}')

%time nrmRow = FrobeniusNormByColumn(A, use_blas = False)
print(f'Looping over columns (no BLAS), the discrepancy in the norm is {nrmRow-nrm:.8e}')

CPU times: user 1.42 s, sys: 49.9 ms, total: 1.47 s
Wall time: 1.4 s
Looping over columns, the discrepancy in the norm is 1.70530257e-13
CPU times: user 1.31 s, sys: 30.7 ms, total: 1.34 s
Wall time: 1.31 s
Looping over columns (no BLAS), the discrepancy in the norm is 6.25277607e-13
