# Sparse Matrices

In [3]:
import numpy as np
import scipy.sparse as sps

rows = [0,1,1,0,4,3]
cols = [4,2,2,0,1,1]
data = [9,4,5,1,2,2]

In [8]:
# Build a DOK matrix
# Initialize with the shape
dok = sps.dok_matrix((5,5),dtype=float)
for r,c,v in zip(rows,cols,data):
    dok[r,c] = v
    
print("dok =\n",dok)

dok =
   (1, 2)	5.0
  (0, 0)	1.0
  (4, 1)	2.0
  (3, 1)	2.0
  (0, 4)	9.0


In [7]:
# If your data is organized into 
# row/col/value lists, it is easier
# to build a COO matrix
coo = sps.coo_matrix((data,(rows,cols)),dtype=float)

# Q: Notice that the print order is different for 
#    coo and dok. Why is that the case?
#
# Q: Notice that index (1,2) appears twice in the coo
#    matrix but not the coo matrix. Why is that the case?
print("coo =\n",coo)

coo =
   (0, 4)	9.0
  (1, 2)	4.0
  (1, 2)	5.0
  (0, 0)	1.0
  (4, 1)	2.0
  (3, 1)	2.0


In [12]:
# Once a sparse dictionary is created in one of the 
# formats that support efficient modification, it is 
# easy to convert to one of the formats that support
# efficient operations.
csr = coo.tocsr()

# Q: What does the csr conversion do to the duplicate 
#    entries in coo?
print("csr =\n",csr)

csr =
   (0, 0)	1.0
  (0, 4)	9.0
  (1, 2)	9.0
  (3, 1)	2.0
  (4, 1)	2.0


#### 1. What does the .nnz atribute store?

In [13]:
print("csr.nnz =",csr.nnz)

csr.nnz = 5


In [16]:
# Use todense to convert back to a dense 
# representation.
print("csr.todense() =\n",csr.todense())

csr.todense() =
 [[ 1.  0.  0.  0.  9.]
 [ 0.  0.  9.  0.  0.]
 [ 0.  0.  0.  0.  0.]
 [ 0.  2.  0.  0.  0.]
 [ 0.  2.  0.  0.  0.]]


#### 2. Timing operations

In [None]:
# CSR and CSC matrices have different strengths
rows = np.random.randint(0,1000,1000)
cols = np.random.randint(0,1000,1000)
vals = np.random.randn(1000)

%timeit 