# CSR: Compressed Sparse Row für dünnbesetzte Matrizen

In [6]:
import numpy as np

In [7]:
matrix = np.array([[1, 4, 0, 0, 1],
                    [0, 0, 0, 0, 0],
                    [0, 5, 6, 0, 0],
                    [0, 0, 3, 0, 0]])
matrix


array([[1, 4, 0, 0, 1],
       [0, 0, 0, 0, 0],
       [0, 5, 6, 0, 0],
       [0, 0, 3, 0, 0]])

In [8]:
vector = np.array([3, 4, 5, 6, 7])
vector

array([3, 4, 5, 6, 7])

In [9]:
# Matrix-Vector-Multiply:
# For each row in matrix: Multiply pairwise elements with vector
matrix @ vector

array([26,  0, 50, 15])

In [10]:
# CSR-Representation
# val: Non-zero values in Reihenfolge der Matrix (zeilenweise)
val = np.array([1, 4, 1, 5, 6, 3])
# col_index: in welcher Spalte steht der jeweilige Wert?
col_index = np.array([0, 1, 4, 1, 2, 2])
# row_ptr: bei welchen Index startet die Zeile? 
# Wenn nur Nullen in einer Zeile, dann Index der nächsten Zeile
# Zusätzliches Element mit Anzahl der Non-Zero Values
row_ptr = np.array([0, 3, 3, 5, 6])

In [29]:
# Result contains one row per row in the matrix (first dim of matrix, shape[0])
result = np.zeros(len(row_ptr) - 1)
for row in range(len(row_ptr) - 1):
    # In each row we do pairwise elements multiplication
    # what are all the entries belonging to one row?
    # We start at row_ptr[row] and end before beginning of next row
    for ptr in range(row_ptr[row], row_ptr[row+1]):
        # To know which position in the vector to take, we look at col_index
        result[row] += val[ptr] * vector[col_index[ptr]]
result



array([26.,  0., 50., 15.])

In [12]:
def to_csr(matrix):
    val = []
    col_index = []
    row_ptr = [0]
    n_non_zero = 0

    for y in range(matrix.shape[0]):
        for x in range(matrix.shape[1]):
            if matrix[y,x] != 0:
                val.append(matrix[y,x])
                col_index.append(x)
                n_non_zero += 1
        row_ptr.append(n_non_zero)

    return val, col_index, row_ptr
        


In [13]:
to_csr(matrix)

([1, 4, 1, 5, 6, 3], [0, 1, 4, 1, 2, 2], [0, 3, 3, 5, 6])

In [19]:
csr = to_csr(matrix)

In [27]:
def to_dense(csr):
    val, col_index, row_ptr = csr
    n_cols = max(col_index) + 1
    matrix = []
    for y in range(len(row_ptr)-1):
        row = [0] * n_cols
        for col_ptr in range(row_ptr[y], row_ptr[y+1]):
            x = col_index[col_ptr]
            row[x] = val[col_ptr]
        matrix.append(row)
    return np.array(matrix)


In [28]:
to_dense(csr)

array([[1, 4, 0, 0, 1],
       [0, 0, 0, 0, 0],
       [0, 5, 6, 0, 0],
       [0, 0, 3, 0, 0]])

In [30]:
def to_csr_np(matrix):
    val = matrix[matrix.nonzero()]
    col_index = matrix.nonzero()[1]
    row_ptr = np.concatenate([[0], np.apply_along_axis(np.sum, 1, matrix > 0).cumsum()])
    return val, col_index, row_ptr
    