# NumPy

This notebook presents [NumPy](https://numpy.org), the main scientific package of Python (see also [SciPy](https://docs.scipy.org/doc/scipy/reference/index.html)).<br> NumPy allows very fast operations on vectors and arrays.

The outline is the following:
* Arrays
* Copy
* Shaping
* Concatenation
* Indexing & slicing
* Splitting
* Operations
* Broadcasting
* Boolean masks
* Advanced indexing

For further reference, check the [Python Data Science Handbook](https://jakevdp.github.io/PythonDataScienceHandbook/index.html).

In [None]:
import numpy as np

## Arrays

In [None]:
# from a list
a = np.array([0, 4, 2])
b = np.array([1, 2, 5.])
c = np.array(["albert", "barbara", "carol"])

In [None]:
a

In [None]:
len(a)

In [None]:
print(a.dtype)

In [None]:
b

In [None]:
print(b.dtype)

In [None]:
c

In [None]:
print(c.dtype)

In [None]:
# be careful with arrays of strings!
c[0] = "abracadabra"

In [None]:
c

In [None]:
c[0]

In [None]:
# specify type
c = np.array(["albert", "barbara", "carol"], dtype='<U32')

In [None]:
c[0] = "abracadabra"

In [None]:
c

In [None]:
# mixing types
d = np.array([1., 'albert', [2, 3, 4]], dtype=object)

In [None]:
d

In [None]:
for element in d:
    print(type(element))

In [None]:
# init arrays
a = np.zeros(10)
b = np.ones(5)
c = -np.ones(3)

In [None]:
a

In [None]:
b

In [None]:
c

In [None]:
# specify type
a = np.zeros(10, dtype=int)

In [None]:
a

In [None]:
a.dtype

In [None]:
# bytes
a = np.zeros(10, dtype=np.uint8)

In [None]:
a[0] = 1000

In [None]:
a

In [None]:
1000 % 256

In [None]:
# memory in bytes
a.nbytes

In [None]:
b = np.zeros(10, dtype=int)

In [None]:
b.nbytes

In [None]:
# 2-D, 3-D, ...
a = np.zeros((3, 4))
b = np.ones((3, 4, 5))
c = np.zeros(tuple(4 * [2]))

In [None]:
print(a)

In [None]:
a.shape

In [None]:
a.ndim

In [None]:
a.size

In [None]:
print(b)

In [None]:
b.shape

In [None]:
b.ndim

In [None]:
b.size

In [None]:
print(c)

In [None]:
c.shape

In [None]:
c.ndim

In [None]:
c.size

In [None]:
# vectors are 1-D arrays
a = np.zeros(4)
a.shape

In [None]:
a.ndim

In [None]:
# note the difference!
b = np.zeros((1, 4))

In [None]:
b

In [None]:
b.shape

In [None]:
b = np.zeros((4, 1))

In [None]:
b

In [None]:
b.shape

In [None]:
# empty array
a = np.array([])
a.shape

In [None]:
len(a)

In [None]:
a.ndim

In [None]:
b = np.ones(0)
b.shape

In [None]:
len(b)

In [None]:
b.ndim

In [None]:
b = np.ones((0, 0))
b.shape

In [None]:
len(b)

In [None]:
b.ndim

In [None]:
# from a list of list
a = np.array([[3, 1, 4], [1, 5, 9]])

In [None]:
a

In [None]:
len(a)

In [None]:
len(np.zeros((4, 5)))

In [None]:
# range of integers
a = np.arange(10)
b = np.arange(5, 10)
c = np.arange(5, 15, 2)
d = np.arange(10, 0, -1)

In [None]:
a

In [None]:
b

In [None]:
c

In [None]:
d

In [None]:
# linear spacing
a = np.linspace(0, 1, 10)

In [None]:
a

In [None]:
len(a)

In [None]:
# Exercise: Build a matrix of size 4 x 5 whose entry (i, j) is i * (j+1)

## Copy

In [None]:
# assignement
a = np.arange(10)
b = a

In [None]:
b

In [None]:
a[0] = -1

In [None]:
b

In [None]:
# copy
a = np.arange(10)
b = a.copy()

In [None]:
b

In [None]:
a[0] = -1

In [None]:
b

In [None]:
# Exercise: What is a[0]?
a = np.ones(5)
b = a
c = a.copy()
b[0] += 1
c[0] += 1
a[0] += c[0]

## Shaping

In [None]:
# from 1D to 2D
a = np.arange(1, 10)
b = a.reshape(3, 3)
print(b)

In [None]:
shape = (3, 3)
c = a.reshape(shape)
print(c)

In [None]:
d = a.reshape(3, -1)
print(d)

In [None]:
# from 2D to 1D
print(b)

In [None]:
b.reshape(9,)

In [None]:
b.reshape(-1,)

In [None]:
b.flatten()

In [None]:
# from 1D to 3D
a = np.arange(24)
b = a.reshape(3, 2, 4)

In [None]:
print(b)

In [None]:
a = np.arange(24)
c = a.reshape(3, 2, -1)

In [None]:
print(c)

In [None]:
a = np.arange(24)
d = a.reshape(3, -1, 4)

In [None]:
print(d)

In [None]:
# from 2D to 2D
a = np.arange(10)
b = a.reshape(2, 5)

In [None]:
print(b)

In [None]:
c = b.reshape(5, 2)

In [None]:
print(c)

In [None]:
# from 1D to 2D (single row or column)
a = np.ones(4, dtype=int)

In [None]:
b = a.reshape(1, 4)
b.shape

In [None]:
b = a.reshape(1, -1)
b.shape

In [None]:
b = a.reshape(-1, 1)
b.shape

In [None]:
# new axis
a = np.ones(4)
a.shape

In [None]:
b = a[:, np.newaxis]
b.shape

In [None]:
b = a[np.newaxis, :]
b.shape

In [None]:
# more compact
b = a[np.newaxis]
b.shape

In [None]:
# adding multiple axes
a = np.ones((4, 3))
a.shape

In [None]:
b = a[np.newaxis, :, np.newaxis, :]
b.shape

In [None]:
# from 1D to 1D
a = np.arange(5)
print(a)

In [None]:
np.insert(a, 2, -1)

In [None]:
np.delete(a, 3)

In [None]:
# from 2D to 2D
a = np.arange(10).reshape(2, -1)
print(a)

In [None]:
np.insert(a, 1, -1, axis=0)

In [None]:
np.insert(a, 3, -1, axis=1)

In [None]:
np.delete(a, 3, axis=1)

In [None]:
# Exercise: What is a[1, 1, 1]?
a = np.arange(8).reshape(2, 2, -1)
print(a)

## Concatenation

In [None]:
# vectors
a = np.ones(4)
b = np.zeros(5)
c = np.arange(6)

In [None]:
np.concatenate([a, b])

In [None]:
np.concatenate([a, b, c])

In [None]:
# 2D arrays
a = np.ones((3, 5))
b = np.zeros((2, 5))
c = np.ones((3, 5))
d = np.zeros((3, 4))

In [None]:
# concatenation = along first axis
np.concatenate([a, b])

In [None]:
np.concatenate([a, b, c])

In [None]:
# equivalent
np.vstack([a, b, c])

In [None]:
# concatenation along second axis
np.concatenate([a, d], axis=1)

In [None]:
np.concatenate([a, d, c], axis=1)

In [None]:
# equivalent
np.hstack([a, d, c])

In [None]:
# Mix of 1D and 2D
a = np.ones((3, 5))
b = np.arange(5)
c = np.zeros((2, 5))

In [None]:
np.vstack([a, b])

In [None]:
np.vstack([a, b, c])

In [None]:
a = np.ones((3, 4))
b = np.arange(3)
c = np.zeros((3, 5))

In [None]:
# need to reshape the vector for horizontal stack!
np.hstack([a, b.reshape(3, 1)])

In [None]:
# Exercise: Build a matrix of size 3 x 5 whose columns are all [2, 5, 8].

## Indexing & slicing

In [None]:
# vector
a = np.arange(10)
print(a)

In [None]:
# indexing
for i in [1, -1, -2]:
    print(a[i])

In [None]:
# slicing
for i in [0, 1, -1, -2, 10, 20, -20]:
    print(a[i:])

In [None]:
for i in [0, 1, -1, -2, 10, 20, -20]:
    print(a[:i])

In [None]:
for i, j in [(2, 5), (5, 2), (0, -1), (4, None)]:
    print(a[i:j])

In [None]:
a[1:8:2]

In [None]:
a[1::2]

In [None]:
a[:8:2]

In [None]:
a[::2]

In [None]:
a[::-1]

In [None]:
a[::-2]

In [None]:
a[5:2:-2]

In [None]:
a[:2:-2]

In [None]:
# 2D array
a = np.arange(20).reshape(4, -1)

In [None]:
print(a)

In [None]:
# indexing
for i, j in [(0, 0), (1, 3), (-1, 0), (-1, -1), (-2, -2)]:
    print(a[i, j])

In [None]:
# row indexing
a[1]

In [None]:
# column indexing
a[:, 2]

In [None]:
# row slicing
a[1:4]

In [None]:
# column slicing
a[:, 2:4]

In [None]:
# subarray
a[1:3, 2:4]

In [None]:
# be careful with copy
b = a[1:3, 2:4]
b[0, 0] = -1
a

In [None]:
# Exercice: what is a[1::2, -2:]?
a = np.arange(16).reshape(4, -1)
print(a)

## Splitting

In [None]:
a = np.arange(20).reshape(4, -1)
print(a)

In [None]:
# vertical split 
np.split(a, 2)

In [None]:
np.split(a, [1, 3])

In [None]:
np.split(a, [3, 1])

In [None]:
# horizontal split 
np.split(a, [2], axis=1)

In [None]:
np.split(a, [1, 3], axis=1)

In [None]:
np.array_split(a, 3, axis=1)

## Operations

In [None]:
a = np.arange(1, 10)

In [None]:
b = 1 / a
c = a + 1
d = np.cos(a)
e = a * b

In [None]:
# test performance
a = np.arange(1, 10**6) 

In [None]:
%%time
b = np.zeros(a.shape)
for i in range(len(a)):
    b[i] = 1 / a[i]

In [None]:
%%time
b = 1 / a

In [None]:
# aggregations
a = np.arange(10)
print(a)
for operator in [np.sum, np.max, np.argmax, np.mean, np.std, np.median]:
    print(operator(a))

In [None]:
# max vs maximum
a = np.arange(10)
b = np.flip(a)
np.maximum(a, b)

In [None]:
np.max(a)

In [None]:
# cumulative sum
print(a)
np.cumsum(a)

In [None]:
# equivalent
np.add.accumulate(a)

In [None]:
# same with maximum
np.random.shuffle(a)
print(a)
np.maximum.accumulate(a)

In [None]:
# specific ranges
print(a)
np.maximum.reduceat(a, [0, 3, 6])

In [None]:
# sliding window
print(a)
b = np.convolve(a, [1, 1, 1])
print(b)

In [None]:
# count unique elements
a = [0, 0, 1, 1, 1, 3]
np.unique(a, return_counts=True)

In [None]:
# 2D
a = np.arange(20).reshape(4, 5)

In [None]:
b = a + 1
c = a * b
d = a.mean(axis=1)

In [None]:
# matrix-vector multiplication
u = np.ones(5)
a.dot(u)

In [None]:
# left multiplication
v = np.ones(4)
a.T.dot(v)

In [None]:
# matrix-matrix multiplication
b = np.arange(10).reshape(5, 2)
a.dot(b)

In [None]:
# left multiplication by reshaping
v[np.newaxis].dot(a)

In [None]:
# outer product
a = np.arange(4)
b = np.arange(5)
np.outer(a, b)

In [None]:
# Exercise: Using outer, build a matrix of size 3 x 5 whose columns are all [2, 5, 8].

## Broadcasting

In [None]:
# simple example
a = np.arange(5)
a + 5

In [None]:
# more interesting case
a = np.arange(5)
b = np.arange(3).reshape(-1, 1)
a + b

In [None]:
# centering along axis 0
a = np.arange(20) % 7
a = a.reshape(4, 5)
print(a)

In [None]:
a - a.mean(axis=0)

In [None]:
# centering along axis 1
a - a.mean(axis=1).reshape(-1, 1)

In [None]:
# Exercise: Using broadcasting, build a matrix of size 3 x 5 whose columns are all [2, 5, 8].

## Boolean masks

In [None]:
a = np.arange(5)
print(a)

In [None]:
# mask
b = a > 2
print(b)

In [None]:
# count
np.sum(a > 2)

In [None]:
# indices
np.where(a > 2)[0]

In [None]:
# comparison
np.all(a > 2)

In [None]:
b = 2 * a
np.all(b >= a)

In [None]:
np.all(a == b)

In [None]:
np.any(a > 2)

In [None]:
# extraction
a[a > 2]

In [None]:
b = a**2
b[a > 2]

In [None]:
# multiple conditons
a[(a > 0) & (b < 5)]

In [None]:
# negation
a[~((a > 0) & (b < 5))]

In [None]:
# 2D
a = np.arange(20).reshape(4, 5)
print(a)

In [None]:
a <= 7

In [None]:
a[a <= 7]

In [None]:
np.where(a <= 7)

In [None]:
# Exercise: Count the number of values in each interval [0, 0.5], [0.5, 0.75], [0.75, 1]
values = np.random.rand(100)
thresholds = np.array([0, 0.5, 0.75, 1])

## Advanced indexing

In [None]:
a = np.arange(1, 20, 2)
print(a)

In [None]:
b = [0, 3, 0, 2]

In [None]:
a[b]

In [None]:
# sequential assignment!
a[b] = np.arange(4)

In [None]:
a[b]

In [None]:
# 2D
a = np.arange(12).reshape(3, 4)
print(a)

In [None]:
# element-wise
row = [0, 1, 2]
col = [0, 1, 3]
a[row, col]

In [None]:
# sub-matrix
row = np.array([0, 2])
col = np.array([0, 1, 3])
a[row[:, np.newaxis], col]

In [None]:
# mask
row = a.sum(axis=1) < 20
col = a.sum(axis=0) > 12
a[row, col]

In [None]:
# mixing index and mask
row = np.array([0, 2])
col = np.sum(a, axis=0) > 12
a[row[:, np.newaxis], col]

In [None]:
# Exercise: Predict the word...
a = np.array(list("abracadabra"))
index = np.argwhere(a == "a").flatten()
word = ''.join(list(a[(index[:-1] + 1)]))