# Sparse Data -> that has mostly unused elements
 - Sparse Data: is a data set where most of the item values are zero.

 - Dense Array: is the opposite of a sparse array: most of the values are not zero.

two types:
CSC - Compressed Sparse Column. For efficient arithmetic, fast column slicing.

CSR - Compressed Sparse Row. For fast row slicing, faster matrix vector products



In [2]:
# Create a CSR matrix from an array:
import numpy as np
from scipy.sparse import csr_matrix
arr = np.array([0, 0, 0, 0, 0, 1, 2, 1, 0, 3])
print(csr_matrix(arr))

<Compressed Sparse Row sparse matrix of dtype 'int64'
	with 4 stored elements and shape (1, 10)>
  Coords	Values
  (0, 5)	1
  (0, 6)	2
  (0, 7)	1
  (0, 9)	3


In [4]:
import numpy as np
from scipy.sparse import csr_matrix
arr = np.array([[0, 0, 0, 0, 0, 1, 2, 1, 0, 3],[0, 0, 0, 0, 0, 1, 2, 1, 0, 3]])
print(csr_matrix(arr))

<Compressed Sparse Row sparse matrix of dtype 'int64'
	with 8 stored elements and shape (2, 10)>
  Coords	Values
  (0, 5)	1
  (0, 6)	2
  (0, 7)	1
  (0, 9)	3
  (1, 5)	1
  (1, 6)	2
  (1, 7)	1
  (1, 9)	3


In [5]:
# Sparse Matrix Methods
#  -Viewing stored data(not the zero items) with the "data" property
arr = np.array([[0,0,0],[0,0,1],[1,0,2]])
print(csr_matrix(arr).data)

[1 1 2]


In [13]:
# Counting nonzeros with the count_nonzero() method:
arr = np.array([[0,0,0],[0,0,1],[1,0,2]])
print(csr_matrix(arr).count_nonzero())

3


In [10]:
# Removing zero-entries from the matrix with the "eliminate_zeros()" method:

arr = np.array([[0,0,0],[0,0,1],[1,0,2]])
mat = csr_matrix(arr)
mat.eliminate_zeros()
print(mat)

<Compressed Sparse Row sparse matrix of dtype 'int64'
	with 3 stored elements and shape (3, 3)>
  Coords	Values
  (1, 2)	1
  (2, 0)	1
  (2, 2)	2


In [11]:
# Eliminating duplicate entries with the "sum_duplicates()" method:
arr = np.array([[0,0,0],[0,0,1],[1,0,2]])
mat = csr_matrix(arr)
mat.sum_duplicates()
print(mat)

<Compressed Sparse Row sparse matrix of dtype 'int64'
	with 3 stored elements and shape (3, 3)>
  Coords	Values
  (1, 2)	1
  (2, 0)	1
  (2, 2)	2


In [12]:
# Converting from csr to csc with the "tocsc()" method:

arr = np.array([[0,0,0],[0,0,1],[1,0,2]])
newarr = csr_matrix(arr).tocsc()
print(newarr)

<Compressed Sparse Column sparse matrix of dtype 'int64'
	with 3 stored elements and shape (3, 3)>
  Coords	Values
  (2, 0)	1
  (1, 2)	1
  (2, 2)	2
