# NumPy

NumPY is a Linar Algebra Library for Python, the reason it is so important for Data Science with Python is that almost all of the libraries in the PyData Ecosystem rely NumPy as on of their main building blocks.

NumPy is also incredibly fast, as is has bindings to C libraries.

[NumPy Documentation](https://numpy.org/index.html)

[NumPy Universal-Functions](https://numpy.org/doc/stable/reference/ufuncs.html)

NumPY arrays essentially come in two flavors: vectors and matrices.
Vectors are strictly 1-d arrays and matrices are 2-d.

In [2]:
import numpy as np

## 1. NumPy Arrays

1d-Array

In [9]:
my_list = [1, 2, 3]
arr = np.array(my_list)
arr

array([1, 2, 3])

2d-Array

In [12]:
my_mat = [[1, 2, 3], [4, 5, 6], [7, 8, 9]]
my_mat


[[1, 2, 3], [4, 5, 6], [7, 8, 9]]

In [13]:
np.array(my_mat)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

10 items: from 0 to 9

In [4]:
np.arange(0, 10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

from 0 to 11, with step of 2

In [5]:
np.arange(0, 11, 2)

array([ 0,  2,  4,  6,  8, 10])

Vector of zeros

In [15]:
np.zeros(4)


array([0., 0., 0., 0.])

Matrix of Zeros

In [6]:
np.zeros((2, 3))

array([[0., 0., 0.],
       [0., 0., 0.]])

1d-Array of Ones

In [7]:
np.ones(4)

array([1., 1., 1., 1.])

2d-Array of Ones

In [16]:
np.ones((2, 4))

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.]])

In [19]:
np.linspace(0, 9, 10)


array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

from 0 to 5, with 10 equidistant points

In [9]:
np.linspace(0, 5, 10)

array([0.        , 0.55555556, 1.11111111, 1.66666667, 2.22222222,
       2.77777778, 3.33333333, 3.88888889, 4.44444444, 5.        ])

Identity-Matrix

In [10]:
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

random uniform, between 0 and 1

In [27]:
np.random.rand(2)

array([0.00382474, 0.17301542])

In [29]:
np.random.rand(5,5)

array([[0.87126575, 0.36287391, 0.22571123, 0.87525215, 0.18193932],
       [0.95492014, 0.43395551, 0.92216355, 0.7920452 , 0.11166635],
       [0.08422171, 0.95413687, 0.2196287 , 0.1880552 , 0.5487267 ],
       [0.29704637, 0.16874035, 0.4710729 , 0.46644397, 0.24600973],
       [0.64225566, 0.4337092 , 0.57231183, 0.23385921, 0.74480756]])

random normal distribution, centered around 0

In [25]:
np.random.randn(2)

array([0.35240601, 0.87026009])

In [26]:
np.random.randn(5,5)

array([[-0.22477546,  1.38412044, -0.47558249, -0.85808202, -0.68806488],
       [ 1.20266958, -0.08559706,  1.24594874, -1.65529391, -2.05580005],
       [-0.34099223,  0.85020031,  0.25040391,  0.34109587, -1.2677251 ],
       [ 0.23159485, -1.74860307, -0.79383778,  0.48628116, -2.02949958],
       [-0.87731946, -1.18999933,  0.28687744, -1.11897398,  0.26755202]])

random from lwo to a high number
low is inclusive, high is exclusive

In [12]:
np.random.randint(1, 100, 3)

array([52,  9, 32])

Reshape

In [16]:
arr = np.arange(25)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23, 24])

In [18]:
arr.shape

(25,)

In [21]:
arr = arr.reshape(5, 5)
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [22]:
arr.shape

(5, 5)

In [30]:
arr.dtype

dtype('int64')

Max/Min

In [23]:
ranarr = np.random.randint(0, 50, 10)
ranarr

array([34, 36, 29, 45,  8, 36, 33,  9, 18, 41])

In [24]:
ranarr.max()

45

In [25]:
ranarr.min()

8

Index

In [27]:
# Index of max-value
ranarr.argmax()

3

In [28]:
# Index of min-value
ranarr.argmin()

4

### Reference


In [33]:
from numpy.random import randint

In [34]:
randint(2,10)

8

## 2. Array Indexing

In [35]:
arr = np.arange(0, 11)
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [31]:
# Position 8
arr[8]

8

In [32]:
# Starting index and ending index
arr[1:5]

array([1, 2, 3, 4])

In [33]:
# Everything beyond index 5
arr[5:]

array([ 5,  6,  7,  8,  9, 10])

In [36]:
arr[0:6]

array([0, 1, 2, 3, 4, 5])

In [37]:
arr[:6]

array([0, 1, 2, 3, 4, 5])

In [40]:
arr[0:5] = 100
arr

array([100, 100, 100, 100, 100,   5,   6,   7,   8,   9,  10])

In [49]:
arr = np.arange(0,11)

In [50]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [51]:
# numpy arrays are able to broadcast
slice_of_arr = arr[0:6]
slice_of_arr

array([0, 1, 2, 3, 4, 5])

In [52]:
slice_of_arr[:] = 99
print(slice_of_arr)
print(arr)

[99 99 99 99 99 99]
[99 99 99 99 99 99  6  7  8  9 10]


Avoid Broadcasting

In [53]:
arr_copy = arr.copy()

In [54]:
arr_copy

array([99, 99, 99, 99, 99, 99,  6,  7,  8,  9, 10])

In [55]:
arr_copy[:] = 100

In [57]:
print(arr_copy)
print(arr)

[100 100 100 100 100 100 100 100 100 100 100]
[99 99 99 99 99 99  6  7  8  9 10]


Index 1: Double bracket Notation

In [58]:
import numpy as np
arr_2d = np.array([[5, 10, 15], [20, 25, 30], [35, 40, 45]])
arr_2d

array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])

In [59]:
# Entire row 0
arr_2d[0]

array([ 5, 10, 15])

In [50]:
arr_2d[1][1]

25

In [51]:
arr_2d[1][2]

30

Index 2: Comma Single Bracket Notation (Use this)

In [52]:
arr_2d[1, 1]

25

In [53]:
arr_2d[1, 2]

30

Chunks of matrices

In [54]:
arr_2d[:2, 1:]

array([[10, 15],
       [25, 30]])

In [60]:
arr_2d

array([[ 5, 10, 15],
       [20, 25, 30],
       [35, 40, 45]])

In [63]:
arr_2d[1:,:2]

array([[20, 25],
       [35, 40]])

In [79]:
arr_2d[0:-1]

array([[ 5, 10, 15],
       [20, 25, 30]])

In [81]:
arr_big = np.array([[5, 10, 15, 20, 25, 30],
                    [20, 25, 30, 35, 40, 45],
                    [35, 40, 45, 50, 55, 60],
                    [50, 55, 60, 65, 70, 75],
                    [65, 70, 75, 80, 85, 90],
                    [80, 85, 90, 95, 100, 105]])
arr_big

array([[  5,  10,  15,  20,  25,  30],
       [ 20,  25,  30,  35,  40,  45],
       [ 35,  40,  45,  50,  55,  60],
       [ 50,  55,  60,  65,  70,  75],
       [ 65,  70,  75,  80,  85,  90],
       [ 80,  85,  90,  95, 100, 105]])

45, 50

60, 65

In [86]:
arr_big[2:4, 2:4]

array([[45, 50],
       [60, 65]])

45  50  55  60

60  65  70  75

75  80  85  90

90  95 100 105

In [89]:
arr_big[2:,2:]

array([[ 45,  50,  55,  60],
       [ 60,  65,  70,  75],
       [ 75,  80,  85,  90],
       [ 90,  95, 100, 105]])

Conditional Selection

In [94]:
arr = np.arange(1, 11)
arr

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [97]:
bool_arr = arr > 5
bool_arr

array([False, False, False, False, False,  True,  True,  True,  True,
        True])

In [98]:
arr[bool_arr]

array([ 6,  7,  8,  9, 10])

In [58]:
# Directly
arr[arr>5]

array([ 6,  7,  8,  9, 10])

In [99]:
arr[arr<3]

array([1, 2])

In [100]:
arr_2d = np.arange(50).reshape(5, 10)
arr_2d

array([[ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14, 15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24, 25, 26, 27, 28, 29],
       [30, 31, 32, 33, 34, 35, 36, 37, 38, 39],
       [40, 41, 42, 43, 44, 45, 46, 47, 48, 49]])

In [61]:
# Grab [21 22 23 24 25]
arr_2d[2, 1:6]

array([21, 22, 23, 24, 25])

## 3. Operations

In [35]:
arr = np.arange(0, 11)
print(arr)
print(arr + arr)
print(arr * arr)

[ 0  1  2  3  4  5  6  7  8  9 10]
[ 0  2  4  6  8 10 12 14 16 18 20]
[  0   1   4   9  16  25  36  49  64  81 100]


Array with Scalars: Basic arithmic operations

In [36]:
print(5*arr)
print(100*arr)

[ 0  5 10 15 20 25 30 35 40 45 50]
[   0  100  200  300  400  500  600  700  800  900 1000]


In [101]:
arr + 100

array([101, 102, 103, 104, 105, 106, 107, 108, 109, 110])

In [102]:
arr - 100

array([-99, -98, -97, -96, -95, -94, -93, -92, -91, -90])

Numpy and Errors

In [39]:
print(1/0) # ZeroDivisionError: division by zero

ZeroDivisionError: division by zero

In [38]:
# no Error with numpy array. Instead of Error -> Value = nan
print(arr / arr)

[nan  1.  1.  1.  1.  1.  1.  1.  1.  1.  1.]


In [40]:
1/arr

array([       inf, 1.        , 0.5       , 0.33333333, 0.25      ,
       0.2       , 0.16666667, 0.14285714, 0.125     , 0.11111111,
       0.1       ])

In [41]:
arr ** 2

array([  0,   1,   4,   9,  16,  25,  36,  49,  64,  81, 100], dtype=int32)

Universal Array Functions

In [42]:
np.sqrt(arr)

array([0.        , 1.        , 1.41421356, 1.73205081, 2.        ,
       2.23606798, 2.44948974, 2.64575131, 2.82842712, 3.        ,
       3.16227766])

In [43]:
np.exp(arr)

array([1.00000000e+00, 2.71828183e+00, 7.38905610e+00, 2.00855369e+01,
       5.45981500e+01, 1.48413159e+02, 4.03428793e+02, 1.09663316e+03,
       2.98095799e+03, 8.10308393e+03, 2.20264658e+04])

In [44]:
np.max(arr) # Same as: arr.max()

10

In [45]:
np.sin(arr)

array([ 0.        ,  0.84147098,  0.90929743,  0.14112001, -0.7568025 ,
       -0.95892427, -0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849,
       -0.54402111])

In [46]:
np.log(arr)

array([      -inf, 0.        , 0.69314718, 1.09861229, 1.38629436,
       1.60943791, 1.79175947, 1.94591015, 2.07944154, 2.19722458,
       2.30258509])