## Observation: Creating a large matrix using numpy causes Memory Error

In [1]:
import numpy as np
A = np.zeros((10**5,10**5))

MemoryError: 

## Create LIL Sparse Matrix

### LIL = Row-based linked list sparse matrix 
https://docs.scipy.org/doc/scipy-0.18.1/reference/generated/scipy.sparse.lil_matrix.html

In [2]:
from scipy.sparse import lil_matrix
from scipy.sparse import csr_matrix

nrow=5
ncol=10
A = lil_matrix((nrow, ncol), dtype=np.int8)

# Randomly choose K=10 entries of A and set them equal to 1
chooseK=5
for (i,j) in zip(np.random.choice(xrange(nrow),chooseK),np.random.choice(xrange(ncol),chooseK)):
    A[i,j]=1

In [3]:
A

<5x10 sparse matrix of type '<type 'numpy.int8'>'
	with 5 stored elements in LInked List format>

In [4]:
# covert A to an usual (dense) matrix
dense_A = A.todense()
print dense_A

[[0 0 0 0 0 0 0 1 0 0]
 [0 0 0 0 0 0 0 0 1 0]
 [1 0 0 0 0 0 0 0 0 0]
 [0 0 1 0 0 0 0 0 0 0]
 [0 1 0 0 0 0 0 0 0 0]]


In [5]:
A = A.tocsr()
#convert to CSR or CSC format for fast arithmetic and matrix vector operations
A

<5x10 sparse matrix of type '<type 'numpy.int8'>'
	with 5 stored elements in Compressed Sparse Row format>

In [6]:
# compute the sum of each row
print A.sum(1)
print "Shape:", A.sum(1).shape

[[1]
 [1]
 [1]
 [1]
 [1]]
Shape: (5, 1)


In [7]:
# Save and Load sparse matrices
##  http://stackoverflow.com/a/8980156
def save_sparse_csr(filename,array):
    np.savez(filename,data = array.data ,indices=array.indices,
             indptr =array.indptr, shape=array.shape )

def load_sparse_csr(filename):
    loader = np.load(filename)
    return csr_matrix((  loader['data'], loader['indices'], loader['indptr']),
                         shape = loader['shape'])

In [8]:
save_sparse_csr("sparse_matrix",A)

In [9]:
B=load_sparse_csr("sparse_matrix.npz")

In [10]:
B

<5x10 sparse matrix of type '<type 'numpy.int8'>'
	with 5 stored elements in Compressed Sparse Row format>