## Numpy Tutorial
https://numpy.org/devdocs/user/quickstart.html 


### Basics
- scientific computation in python
- multidim arr object
- functions fast ops in arrays, math, logical, shape, sorting, fourier, linalg, stats, etc

**Differences to lists**
- ndarray is not mutable, it creates a new one
- ndarray requires all items of the same type
- optimal math ops with large numbers


**Why is it fast**
- Pre compiled C code
- Vectorization, no looping, indexing
- Broadcasting, implicit behavior to `fix operations` check [algebra.ipynb](algebra.ipynb)

In [2]:
import numpy as np
# manage multidimensional arrays properly on python

In [4]:
# array class is .ndarray, alias is .array

In [25]:
def details(arr: np.array):
    print(
        arr, 
        arr.shape, 
        arr.ndim, 
        arr.size, # total num of elements
        arr.dtype, # items type
        arr.itemsize, # size in bytes of each item
        type(arr)
    )

In [26]:
details(np.array([1,2,3]))

[1 2 3] (3,) 1 3 int64 8 <class 'numpy.ndarray'>


In [27]:
details(np.array([1,2,3.0]))

[1. 2. 3.] (3,) 1 3 float64 8 <class 'numpy.ndarray'>


In [28]:
details(np.array([1,2,3.0, "s"]))

['1' '2' '3.0' 's'] (4,) 1 4 <U32 128 <class 'numpy.ndarray'>


In [29]:
details(np.array([[1,2,3.0], ["s"]]))
# non homogenous dimensions

ValueError: setting an array element with a sequence. The requested array has an inhomogeneous shape after 1 dimensions. The detected shape was (2,) + inhomogeneous part.

In [30]:
details(np.array([[1,2,3.0], ["s",2, 3]]))

[['1' '2' '3.0']
 ['s' '2' '3']] (2, 3) 2 6 <U32 128 <class 'numpy.ndarray'>


In [33]:
# Creating an array

# from a list
arr1= np.array([1,2,3])
arr2 = np.array([1,2,3], dtype=float) # set type when creating
arr3 = np.array([1,2,3], dtype=complex)
details(arr2)
details(arr3)

[1. 2. 3.] (3,) 1 3 float64 8 <class 'numpy.ndarray'>
[1.+0.j 2.+0.j 3.+0.j] (3,) 1 3 complex128 16 <class 'numpy.ndarray'>


In [34]:
# from zeros

np.zeros((3,4)) # receives a shape as input

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [37]:
details(np.ones((2,3,4), dtype=np.float16))

[[[1. 1. 1. 1.]
  [1. 1. 1. 1.]
  [1. 1. 1. 1.]]

 [[1. 1. 1. 1.]
  [1. 1. 1. 1.]
  [1. 1. 1. 1.]]] (2, 3, 4) 3 24 float16 2 <class 'numpy.ndarray'>


In [40]:
# this one shows like values, but actually no val init happened, faster.
details(np.empty((2,3,4), dtype=np.float16))

[[[1. 1. 1. 1.]
  [1. 1. 1. 1.]
  [1. 1. 1. 1.]]

 [[1. 1. 1. 1.]
  [1. 1. 1. 1.]
  [1. 1. 1. 1.]]] (2, 3, 4) 3 24 float16 2 <class 'numpy.ndarray'>


In [48]:
# sequences of numbers, np.arange, like python range
print(np.arange(10, 30, 5)) # same as range()
print(np.arange(0.1, 2, 0.25)) # floats

# arange with floats, "not possible to predict element num", use linspace instead, hmmm, wtf?
print(np.linspace(0, 2, 5)) # from, to, how many items in between


[10 15 20 25]
[0.1  0.35 0.6  0.85 1.1  1.35 1.6  1.85]
[0.  0.5 1.  1.5 2. ]


In [56]:
# printing arrays
# - last axis from left to right
# - second to last, top to bottom
# - rest als top to bottom

print(np.zeros((1, 2, 3)))
print("-----")
print(np.ones((2, 2, 3)))

[[[0. 0. 0.]
  [0. 0. 0.]]]
-----
[[[1. 1. 1.]
  [1. 1. 1.]]

 [[1. 1. 1.]
  [1. 1. 1.]]]


In [64]:
# init with arange and reshape
arr = np.arange(2, 110, 3)
details(arr)

[  2   5   8  11  14  17  20  23  26  29  32  35  38  41  44  47  50  53
  56  59  62  65  68  71  74  77  80  83  86  89  92  95  98 101 104 107] (36,) 1 36 int64 8 <class 'numpy.ndarray'>


In [66]:
arr.reshape(6,5) # 36 items, how to reshape

ValueError: cannot reshape array of size 36 into shape (6,5)

In [72]:
# print(arr.reshape(6,6))
# print(arr.reshape(6,3,2))
print(arr.reshape(6, -1)) # -1 to let numpy figure out the correct shape

[[  2   5   8  11  14  17]
 [ 20  23  26  29  32  35]
 [ 38  41  44  47  50  53]
 [ 56  59  62  65  68  71]
 [ 74  77  80  83  86  89]
 [ 92  95  98 101 104 107]]


### Operations

In [78]:
# Arithmetic ops on arr, apply elementwise
a = np.array([5,10,15])
b = np.arange(3)
print(a,b)
print(a-b)
print(b**2)
print(5 * np.sin(a)) # apply sin element wise, then * scalar (5)


[ 5 10 15] [0 1 2]
[ 5  9 13]
[0 1 4]
[-4.79462137 -2.72010555  3.2514392 ]


In [81]:
# * -> elementwise
# @ -> matrix product

print(a @ b) # dot product
print(a*b) # element wise

40
[ 0 10 30]


In [94]:
# matrix
A = np.arange(4).reshape(2,2) # (2, 2)
b = np.arange(5,7,1) # (2,)
print(A)
print(b)

[[0 1]
 [2 3]]
[5 6]


In [95]:
print(A*b)
print(A@b)
A.dot(b) # same as @
# (2,) @ (2,2) is not allowed, but np does broadcasting
# explained in algebra.ipynb
print(b@A)


[[ 0  6]
 [10 18]]
[ 6 28]
[12 23]


In [99]:
# += and *= operate in place
rg = np.random.default_rng(1) # seed
print(rg)
a = np.ones((2,3), dtype=int)
b = rg.random((2,3))
print(a)
print(b)

Generator(PCG64)
[[1 1 1]
 [1 1 1]]
[[0.51182162 0.9504637  0.14415961]
 [0.94864945 0.31183145 0.42332645]]


In [101]:
a *= 3
print(a)

[[9 9 9]
 [9 9 9]]


In [102]:
b += a
print(b)

[[9.51182162 9.9504637  9.14415961]
 [9.94864945 9.31183145 9.42332645]]


In [104]:
a += b # this fails because a=int, b=float

UFuncTypeError: Cannot cast ufunc 'add' output from dtype('float64') to dtype('int64') with casting rule 'same_kind'

In [106]:
# when operating arrays of != types, output correspond to type that is more general
# upcasting

In [117]:
# unary operations are available
# print(a)
# print(a.sum())
# # can also do on 1 axis
# print(a.sum(axis=0))
# print(a.sum(axis=1))

print ('--------')

x = np.arange(0,16,1).reshape(1,4,4)
print(x)
print(x.sum())
print(x.sum(axis=0)) # nothing 1 length there
print(x.sum(axis=1))
print(x.sum(axis=1).shape)

--------
[[[ 0  1  2  3]
  [ 4  5  6  7]
  [ 8  9 10 11]
  [12 13 14 15]]]
120
[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]
 [12 13 14 15]]
[[24 28 32 36]]
(1, 4)


### Indexing, slicing, iterating

One dim arr, can be indexed,sliced,iterated, like lists in python

In [120]:
a = np.arange(10)**3
a

array([  0,   1,   8,  27,  64, 125, 216, 343, 512, 729])

In [121]:
a[2:5]

array([ 8, 27, 64])

In [125]:
a[:6:2] # from 0 to 6 idx, take every 2nd element

array([ 0,  8, 64])

In [128]:
a[:6:2] = 1000 # those items, equal x
a

array([1000,    1, 1000,   27, 1000,  125,  216,  343,  512,  729])

In [129]:
a[::-1] # reversed

array([ 729,  512,  343,  216,  125, 1000,   27, 1000,    1, 1000])

In [131]:
for i in a:
    print(i)

1000
1
1000
27
1000
125
216
343
512
729


In [136]:
M = np.arange(12).reshape(3, 4)
print(M)

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]


In [139]:
M[:2,], M[:,1], M[2,:]

(array([[0, 1, 2, 3],
        [4, 5, 6, 7]]),
 array([1, 5, 9]),
 array([ 8,  9, 10, 11]))

In [145]:
print(M[:,:]) # slice at each dimenion
print(M[1]) # row, all columns, if no n dim provided, it's complete index
print(M[1,...])

[[ 0  1  2  3]
 [ 4  5  6  7]
 [ 8  9 10 11]]
[4 5 6 7]
[4 5 6 7]


In [153]:
for row in M: # over row
    print(row, end=" -- ")
print()
for item in M.flat: # iterate over each item
    print(item, end=",")

[0 1 2 3] -- [4 5 6 7] -- [ 8  9 10 11] -- 
0,1,2,3,4,5,6,7,8,9,10,11,

### Shape Manipulation

In [155]:
a = np.floor(10 * rg.random((3,4)))
a, a.shape

(array([[2., 2., 7., 2.],
        [4., 9., 9., 7.],
        [5., 2., 1., 9.]]),
 (3, 4))

In [158]:
a.ravel() # didn't know what this word meant, `untangle`

array([2., 2., 7., 2., 4., 9., 9., 7., 5., 2., 1., 9.])

In [159]:
a.reshape(2,6)
a.T

array([[2., 4., 5.],
       [2., 9., 2.],
       [7., 9., 1.],
       [2., 7., 9.]])

In [160]:
#### Stacking arrays

In [169]:
a = np.floor(10 * rg.random((2,2)))
b = np.floor(10 * rg.random((2,2)))
print(a)
print(b)

[[7. 2.]
 [8. 0.]]
[[8. 1.]
 [3. 3.]]


In [170]:
np.vstack([a,b])

array([[7., 2.],
       [8., 0.],
       [8., 1.],
       [3., 3.]])

In [171]:
np.hstack([a,b])

array([[7., 2., 8., 1.],
       [8., 0., 3., 3.]])

In [173]:
np.column_stack([a,b]) # same as `hstack` only for 2D

array([[7., 2., 8., 1.],
       [8., 0., 3., 3.]])

In [174]:
# vstack, hstack, column_stack, concatenate, c_, r_
# check differences, axis, etc

In [183]:
# splitting, vsplit (vertical axis), array_split allows to set axis
print(a)
print(np.vsplit(a, 1))
print(np.vsplit(a, 2))

[[7. 2.]
 [8. 0.]]
[array([[7., 2.],
       [8., 0.]])]
[array([[7., 2.]]), array([[8., 0.]])]


In [185]:
np.hsplit(a,2)

[array([[7.],
        [8.]]),
 array([[2.],
        [0.]])]

In [186]:
c = a.view() # copy
c is a

False

In [187]:
c.base is a # view of the data owned by a, which means, diff reference to the same content

True

In [188]:
a *= 2
a

array([[14.,  4.],
       [16.,  0.]])

In [189]:
c

array([[14.,  4.],
       [16.,  0.]])

In [192]:
d = c[:,1] # new array
d

array([4., 0.])

In [193]:
d = a.copy() # complete copy and data

In [194]:
# https://numpy.org/devdocs/reference/routines.html#routines

### Not so basic
- Broadcasting rules, check algebra.ipynb
- arrays can be indexed in more complex ways

In [196]:
a = np.arange(12) **2
a

array([  0,   1,   4,   9,  16,  25,  36,  49,  64,  81, 100, 121])

In [198]:
i = np.array([1,1,3,8,5])
print(a[i]) # each i treated as an index

[ 1  1  9 64 25]


In [202]:
a1 = a.reshape(2,6 )
a1

array([[  0,   1,   4,   9,  16,  25],
       [ 36,  49,  64,  81, 100, 121]])

In [210]:
i = np.array([[0,1], [2,3]])
a[i]

array([[0, 1],
       [4, 9]])

In [212]:
i = np.array([0]) # retrieves by row
a1[i]

array([[ 0,  1,  4,  9, 16, 25]])

In [214]:
j = np.array([2,3]) # i, j retrieval from matrix
a1[i,j]

array([4, 9])

In [229]:
a1 = np.hstack([a1, np.array([[2], [3]])])
print(a1)
print(a1.argmax()) # idx of max number in array
print(a1.argmax(axis=0)) # for each column , find max num id
print(a1.argmax(axis=1)) # for each row, find max num idx

[[  0   1   4   9  16  25   2   2   2   2]
 [ 36  49  64  81 100 121   3   3   3   3]]
15
[1 1 1 1 1 1 1 1 1 1]
[5 5]


In [234]:
### Indexing with booleans

a = np.arange(12).reshape(3,4)
b = a > 4
a[b]

array([ 5,  6,  7,  8,  9, 10, 11])

In [236]:
a[b] = 0
a # masking

array([[0, 1, 2, 3],
       [4, 0, 0, 0],
       [0, 0, 0, 0]])

### Tricks and Tips

In [242]:
# auto reshaping
a = np.arange(30)
b = a.reshape(3, -1, 5) # autocomplete whatever on -1
b

array([[[ 0,  1,  2,  3,  4],
        [ 5,  6,  7,  8,  9]],

       [[10, 11, 12, 13, 14],
        [15, 16, 17, 18, 19]],

       [[20, 21, 22, 23, 24],
        [25, 26, 27, 28, 29]]])

In [243]:
# Vector stacking
# 2D array from equally sized row vectors
x = np.arange(0,10,2)
y = np.arange(5)
m = np.vstack([x,y])
m

array([[0, 2, 4, 6, 8],
       [0, 1, 2, 3, 4]])

In [244]:
xy = np.hstack([x,y])
xy

array([0, 2, 4, 6, 8, 0, 1, 2, 3, 4])