In [1]:
from itertools import combinations
import numpy as np
from scipy import sparse
from sklearn.base import BaseEstimator, TransformerMixin

In [3]:
A = sparse.csr_matrix([[1, 2, 0], [0, 0, 3], [4, 0, 5]])

In [5]:
v = np.array([1, 0, -1])

In [6]:
A.dot(v)

array([ 1, -3, -1], dtype=int64)

In [7]:
from scipy.sparse import lil_matrix
from scipy.sparse.linalg import spsolve
from numpy.linalg import solve, norm
from numpy.random import rand

In [8]:
A = lil_matrix((1000, 1000))
A[0, :100] = rand(100)
A[1, 100:200] = A[0, :100]
A.setdiag(rand(1000))

In [9]:
A = A.tocsr()

In [10]:
b = rand(1000)

In [11]:
x = spsolve(A, b)

In [12]:
x_ = solve(A.toarray(), b)

In [13]:
err = norm(x-x_)
err < 1e-10

True

In [14]:
from scipy import sparse
from numpy import array
I = array([0, 3, 1, 0])
J = array([0, 3, 1, 2])
V = array([4, 5, 7, 9])
A = sparse.coo_matrix((V, (I, J)), shape=(4, 4))

In [16]:
A.toarray()

array([[4, 0, 9, 0],
       [0, 7, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 5]])

In [15]:
I = array([0, 0, 1, 3, 1, 0, 0])
J = array([0, 2, 1, 3, 1, 0, 0])
V = array([1, 1, 1, 1, 1, 1, 1])
B = sparse.coo_matrix((V, (I, J)), shape=(4, 4)).tocsr()

In [17]:
B.toarray()

array([[3, 0, 1, 0],
       [0, 2, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 1]], dtype=int64)

In [18]:
from scipy.sparse import csc_matrix, isspmatrix_csc
isspmatrix_csc(csc_matrix([[5]]))

True

In [20]:
from scipy.sparse import csc_matrix, csr_matrix, isspmatrix_csc
isspmatrix_csc(csr_matrix([[5]]))

False

In [21]:
import numpy as np
from scipy.sparse import csc_matrix
csc_matrix((3, 4), dtype=np.int8).toarray()

array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]], dtype=int8)

In [22]:
row = np.array([0, 2, 2, 0, 1, 2])
col = np.array([0, 0, 1, 2, 2, 2])
data = np.array([1, 2, 3, 4, 5, 6])
csc_matrix((data, (row, col)), shape=(3, 3)).toarray()

array([[1, 0, 4],
       [0, 0, 5],
       [2, 3, 6]], dtype=int64)

In [23]:
indptr = np.array([0, 2, 3, 6])
indices = np.array([0, 2, 2, 0, 1, 2])
data = np.array([1, 2, 3, 4, 5, 6])
csc_matrix((data, indices, indptr), shape=(3, 3)).toarray()

array([[1, 0, 4],
       [0, 0, 5],
       [2, 3, 6]])

In [24]:
import numpy as np
from scipy.sparse import csr_matrix
csr_matrix((3, 4), dtype=np.int8).toarray()

array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]], dtype=int8)

In [25]:
row = np.array([0, 0, 1, 2, 2, 2])
col = np.array([0, 2, 2, 0, 1, 2])
data = np.array([1, 2, 3, 4, 5, 6])
csr_matrix((data, (row, col)), shape=(3, 3)).toarray()

array([[1, 0, 2],
       [0, 0, 3],
       [4, 5, 6]], dtype=int64)

In [26]:
indptr = np.array([0, 2, 3, 6])
indices = np.array([0, 2, 2, 0, 1, 2])
data = np.array([1, 2, 3, 4, 5, 6])
csr_matrix((data, indices, indptr), shape=(3, 3)).toarray()

array([[1, 0, 2],
       [0, 0, 3],
       [4, 5, 6]])

In [31]:
docs = [['hello', 'world', 'hello'], ['goodbye', 'cruel', 'world']]
indptr = [0]
indices = []
data = []
vocabulary = {}
for d in docs:
    for term in d:
        print(term, vocabulary)
        index = vocabulary.setdefault(term, len(vocabulary))
        print(index)
        indices.append(index)
        print(indices)
        data.append(1)
        print(data)
    indptr.append(len(indices))
    print(indptr)
    
csr_matrix((data, indices, indptr), dtype=int).toarray()

hello {}
0
[0]
[1]
world {'hello': 0}
1
[0, 1]
[1, 1]
hello {'hello': 0, 'world': 1}
0
[0, 1, 0]
[1, 1, 1]
[0, 3]
goodbye {'hello': 0, 'world': 1}
2
[0, 1, 0, 2]
[1, 1, 1, 1]
cruel {'hello': 0, 'world': 1, 'goodbye': 2}
3
[0, 1, 0, 2, 3]
[1, 1, 1, 1, 1]
world {'hello': 0, 'world': 1, 'goodbye': 2, 'cruel': 3}
1
[0, 1, 0, 2, 3, 1]
[1, 1, 1, 1, 1, 1]
[0, 3, 6]


array([[2, 1, 0, 0],
       [0, 1, 1, 1]])