# Chapter 1

This is all about data structures in NumPy

In [29]:
import numpy as np

In [30]:
vector_row = np.array([1, 2, 3])
vector_col = np.array([[1], [2], [3]])
matrix = np.array([[1, 2], [1, 2], [1, 2]])

print(np.array_repr(vector_row))
print(np.array_repr(vector_col))
print(np.array_repr(matrix))

array([1, 2, 3])
array([[1],
       [2],
       [3]])
array([[1, 2],
       [1, 2],
       [1, 2]])


There is `np.mat([[1,2],[1,2]])` which would create a Matrix data structure, but arrays are apparently the de facto standard for some reason, see [numpy Matrix documentation](https://numpy.org/doc/2.1/reference/generated/numpy.matrix.html).

In [31]:
from scipy import sparse

In [32]:
some_matrix = np.array([[0, 0], [0, 1], [3, 0]])
some_matrix_sparse = sparse.csr_matrix(some_matrix)

matrix_large = np.array([[0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
                         [0, 1, 0, 0, 0, 0, 0, 0, 0, 0],
                         [3, 0, 0, 0, 0, 0, 0, 0, 0, 0]])
matrix_large_sparse = sparse.csr_matrix(matrix_large)

print(some_matrix_sparse)
print(matrix_large_sparse)

<Compressed Sparse Row sparse matrix of dtype 'int64'
	with 2 stored elements and shape (3, 2)>
  Coords	Values
  (1, 1)	1
  (2, 0)	3
<Compressed Sparse Row sparse matrix of dtype 'int64'
	with 2 stored elements and shape (3, 10)>
  Coords	Values
  (1, 1)	1
  (2, 0)	3


It is interesting to note that the print function of sparse matrices tells me the shape - the CSR representation is the same here!

In [33]:
zeroes = np.zeros(shape=5, dtype=int)
ones = np.full(shape=(10, 10), fill_value=1.337)
zeroes_sparse = sparse.csr_matrix(zeroes)

print(zeroes)
print(ones)
print(zeroes_sparse)

[0 0 0 0 0]
[[1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337]
 [1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337]
 [1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337]
 [1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337]
 [1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337]
 [1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337]
 [1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337]
 [1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337]
 [1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337]
 [1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337 1.337]]
<Compressed Sparse Row sparse matrix of dtype 'int64'
	with 0 stored elements and shape (1, 5)>


In [34]:
print(zeroes[3])
print(some_matrix[2])
print(some_matrix[-2])
print(some_matrix[1:])
print(some_matrix[:10000])

0
[3 0]
[0 1]
[[0 1]
 [3 0]]
[[0 0]
 [0 1]
 [3 0]]


Python is very interesting regarding slicing of arrays, element selection, and so on!

In [35]:
another_matrix = np.array([[1, 2, 3, 4],
                           [5, 6, 7, 8],
                           [9, 10, 11, 12]])

another_matrix_sparse = sparse.csr_matrix(another_matrix)

print(another_matrix.shape)
print(another_matrix.size)
print(another_matrix.ndim)
print(another_matrix.nbytes)
print(another_matrix_sparse.data.nbytes +
      another_matrix_sparse.indptr.nbytes + another_matrix_sparse.indices.nbytes)

print(f"Actual sparsly populated matrix size: {matrix_large.nbytes}")
print(
    f"Actual sparsly populated matrix size: {matrix_large_sparse.data.nbytes + matrix_large_sparse.indptr.nbytes + matrix_large_sparse.indices.nbytes}")

(3, 4)
12
2
96
160
Actual sparsly populated matrix size: 240
Actual sparsly populated matrix size: 40


In [36]:
def add_100(i): return i + 100


vectorized_add_100 = np.vectorize(add_100)

another_matrix_plus_100 = vectorized_add_100(another_matrix)
print(another_matrix_plus_100)

broadcasting_add_100_to_another_matrix = another_matrix + 100
print(broadcasting_add_100_to_another_matrix)

hadamard = another_matrix * another_matrix
print(hadamard)

[[101 102 103 104]
 [105 106 107 108]
 [109 110 111 112]]
[[101 102 103 104]
 [105 106 107 108]
 [109 110 111 112]]
[[  1   4   9  16]
 [ 25  36  49  64]
 [ 81 100 121 144]]


In [42]:
min = np.min(another_matrix)
max = np.max(another_matrix)

min_col = np.min(another_matrix, axis=0)
max_col = np.max(another_matrix, axis=0)

min_row = np.min(another_matrix, axis=1)
max_row = np.max(another_matrix, axis=1)

print(f"min:{min}, max:{max}")
print(f"min col:{min_col}, max col:{max_col}")
print(f"min row:{min_row}, max row:{max_row}")

min:1, max:12
min col:[1 2 3 4], max col:[ 9 10 11 12]
min row:[1 5 9], max row:[ 4  8 12]
