<a href="https://colab.research.google.com/github/spencer18001/Clustering-And-Dimensionality-Reduction---Deep-Dive/blob/main/06/0606.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Sparse matrices

In [None]:
import numpy as np
from scipy.sparse import csr_matrix
from scipy.sparse import coo_matrix
from scipy.sparse import csc_matrix

### COO format

In [None]:
# Create a 2D NumPy array with lots of zeros
dense_matrix = np.array([
    [0, 0, 0, 1],
    [2, 0, 0, 3],
    [0, 4, 0, 0],
    [1, 2, 5, 0]
])

# Convert the dense matrix to COO format
sparse_matrix_coo = coo_matrix(dense_matrix)

# Print out the internal data structure of the COO matrix
print("Data array:", sparse_matrix_coo.data)
print("Row Indices array:", sparse_matrix_coo.row)
print("Column Indices array:", sparse_matrix_coo.col)

Data array: [1 2 3 4 1 2 5]
Row Indices array: [0 1 1 2 3 3 3]
Column Indices array: [3 0 3 1 0 1 2]


In [None]:
# Perform row slicing
sparse_matrix_coo[1:3, :]

TypeError: 'coo_matrix' object is not subscriptable

### CSR format

In [None]:
# Create a 2D NumPy array with lots of zeros
dense_matrix = np.array([
    [0, 0, 0, 1],
    [2, 0, 0, 3],
    [0, 4, 0, 0],
    [1, 2, 5, 0]
])

# Convert the dense matrix to CSR format
sparse_matrix_csr = csr_matrix(dense_matrix)

# Print out the internal data structure of the CSR matrix
print(dense_matrix)
print("Data array:", sparse_matrix_csr.data)
print("Indices array:", sparse_matrix_csr.indices)
print("Indptr array:", sparse_matrix_csr.indptr) # 0:1:3:4:7

[[0 0 0 1]
 [2 0 0 3]
 [0 4 0 0]
 [1 2 5 0]]
Data array: [1 2 3 4 1 2 5]
Indices array: [3 0 3 1 0 1 2]
Indptr array: [0 1 3 4 7]


In [None]:
# Perform row slicing
row_slice = sparse_matrix_csr[1:3, :]
print("Row slice:")
print(row_slice.toarray())

Row slice:
[[2 0 0 3]
 [0 4 0 0]]


### CSC format

In [None]:
# Create a 2D NumPy array with lots of zeros
dense_matrix = np.array([
    [0, 0, 0, 1],
    [2, 0, 0, 3],
    [0, 4, 0, 0],
    [1, 2, 5, 0]
])

# Convert the dense matrix to CSC format
sparse_matrix_csc = csc_matrix(dense_matrix)

# Print out the internal data structure of the CSC matrix
print("Data array:", sparse_matrix_csc.data)
print("Indices array:", sparse_matrix_csc.indices)
print("Indptr array:", sparse_matrix_csc.indptr)

Data array: [2 1 4 2 5 1 3]
Indices array: [1 3 2 3 3 0 1]
Indptr array: [0 2 4 5 7]


### Usefull methods

In [None]:
sparse_matrix_csc.toarray()

array([[0, 0, 0, 1],
       [2, 0, 0, 3],
       [0, 4, 0, 0],
       [1, 2, 5, 0]])

In [None]:
sparse_matrix_csc.tocoo()

<4x4 sparse matrix of type '<class 'numpy.int64'>'
	with 7 stored elements in COOrdinate format>

In [None]:
sparse_matrix_csc.tocsc()

<4x4 sparse matrix of type '<class 'numpy.int64'>'
	with 7 stored elements in Compressed Sparse Column format>

In [None]:
# sometimes when certain arithmetical operations are performed,
# some of the zeros will get stored inside of your sparse matrix
# and this method can help you remove these zeros and save some space
sparse_matrix_coo.eliminate_zeros()