# Python Data Science Handbook

## 2. Introduction to NumPy

In [1]:
import numpy
numpy.__version__

'1.21.5'

In [2]:
pip install numpy

Note: you may need to restart the kernel to use updated packages.


In [1]:
import numpy as np

### Understanding Data Types in Python

In [2]:
result = 0

for i in range(100):
    result += i

In [3]:
result

4950

In [4]:
x = 4
x = 'four'

### A Python List Is More Than Just a List

In [5]:
L = list(range(10))
L

[0, 1, 2, 3, 4, 5, 6, 7, 8, 9]

In [6]:
type(L[0])

int

In [7]:
L2 = [str(c) for c in L]
L2

['0', '1', '2', '3', '4', '5', '6', '7', '8', '9']

In [8]:
L3 = [True, '2', 3.0, 4]
[type(item) for item in L3]

[bool, str, float, int]

### Fixed-Type Arrays in Python

In [9]:
import array

L = list(range(10))
A = array.array('i', L)
A

array('i', [0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

### Creating Arrays from Python Lists

In [10]:
np.array([1, 4, 2, 5, 3])

array([1, 4, 2, 5, 3])

In [11]:
np.array([3.14, 4, 2, 3])

array([3.14, 4.  , 2.  , 3.  ])

In [12]:
np.array([1, 2, 3, 4], dtype=np.float32)

array([1., 2., 3., 4.], dtype=float32)

In [13]:
np.array([range(i, i + 3) for i in [2, 4, 6]])

array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

### Creating Arrays from Scratch

In [14]:
np.zeros(10, dtype=int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [15]:
np.ones((3, 5), dtype=float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [16]:
np.full((3, 5), 3.14)

array([[3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14],
       [3.14, 3.14, 3.14, 3.14, 3.14]])

In [17]:
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [18]:
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [19]:
np.random.random((3, 3))

array([[0.0130359 , 0.73943312, 0.57092601],
       [0.84326495, 0.47541131, 0.58369234],
       [0.7085162 , 0.65015696, 0.92867038]])

In [20]:
np.random.normal(0, 1, (3, 3))

array([[ 0.27597385,  0.50391515, -0.15495558],
       [ 0.83858179,  0.37855937, -0.85680297],
       [ 2.07891043, -0.94093269, -1.11152983]])

In [21]:
np.random.randint(0, 10, (3, 3))

array([[0, 8, 8],
       [5, 8, 5],
       [5, 2, 6]])

In [22]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [23]:
np.empty(3)

array([1., 1., 1.])

### NumPy Standard Data Types

In [24]:
np.zeros(10, dtype='int16')

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int16)

In [25]:
np.zeros(10, dtype=np.int16)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int16)

In [26]:
np.zeros(10, dtype='uint64')

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=uint64)

### NumPy Array Attributes

In [27]:
rng = np.random.default_rng(seed=1701)

x1 = rng.integers(10, size=6)
x2 = rng.integers(10, size=(3, 4))
x3 = rng.integers(10, size=(3, 4, 5))

print(x1, '\n', '\n')
print(x2, '\n', '\n')
print(x3)

[9 4 0 3 8 6] 
 

[[3 1 3 7]
 [4 0 2 3]
 [0 0 6 9]] 
 

[[[4 3 5 5 0]
  [8 3 5 2 2]
  [1 8 8 5 3]
  [0 0 8 5 8]]

 [[5 1 6 2 3]
  [1 2 5 6 2]
  [5 2 7 9 3]
  [5 6 0 2 0]]

 [[2 9 4 3 9]
  [9 2 2 4 0]
  [0 3 0 0 2]
  [3 2 7 4 7]]]


In [28]:
print('x3 ndim: ', x3.ndim)
print('x3 shape:', x3.shape)
print('x3 size: ', x3.size)
print('dtype:   ', x3.dtype)

x3 ndim:  3
x3 shape: (3, 4, 5)
x3 size:  60
dtype:    int64


### Array Indexing: Accessing Single Elements

In [29]:
x1

array([9, 4, 0, 3, 8, 6], dtype=int64)

In [30]:
x1[0]

9

In [31]:
x1[-1]

6

In [32]:
x1[-2]

8

In [33]:
x2

array([[3, 1, 3, 7],
       [4, 0, 2, 3],
       [0, 0, 6, 9]], dtype=int64)

In [34]:
x2[0, 0]

3

In [35]:
x2[2, 0]

0

In [36]:
x2[2, -1]

9

In [37]:
x2[0, 0] = 12
x2

array([[12,  1,  3,  7],
       [ 4,  0,  2,  3],
       [ 0,  0,  6,  9]], dtype=int64)

In [38]:
x1[0] = 3.14159
x1

array([3, 4, 0, 3, 8, 6], dtype=int64)

### Array Slicing: Accessing Subarrays. One-Dimensional Subarrays

In [39]:
x1

array([3, 4, 0, 3, 8, 6], dtype=int64)

In [40]:
x1[:3]

array([3, 4, 0], dtype=int64)

In [41]:
x1[3:]

array([3, 8, 6], dtype=int64)

In [42]:
x1[1:4]

array([4, 0, 3], dtype=int64)

In [43]:
x1[1::2]

array([4, 3, 6], dtype=int64)

In [44]:
x1[::-1]

array([6, 8, 3, 0, 4, 3], dtype=int64)

In [45]:
x1[4::-2]

array([8, 0, 3], dtype=int64)

### Multidimensional Subarrays

In [46]:
x2

array([[12,  1,  3,  7],
       [ 4,  0,  2,  3],
       [ 0,  0,  6,  9]], dtype=int64)

In [47]:
x2[:2, :3]

array([[12,  1,  3],
       [ 4,  0,  2]], dtype=int64)

In [48]:
x2[:3, ::2]

array([[12,  3],
       [ 4,  2],
       [ 0,  6]], dtype=int64)

In [49]:
x2[::-1, ::-1]

array([[ 9,  6,  0,  0],
       [ 3,  2,  0,  4],
       [ 7,  3,  1, 12]], dtype=int64)

In [50]:
x2[:, 0]

array([12,  4,  0], dtype=int64)

In [51]:
x2[0, :]

array([12,  1,  3,  7], dtype=int64)

In [52]:
x2[0]

array([12,  1,  3,  7], dtype=int64)

### Subarrays as No-Copy Views

In [53]:
print(x2)

[[12  1  3  7]
 [ 4  0  2  3]
 [ 0  0  6  9]]


In [54]:
x2_sub = x2[:2, :2]
print(x2_sub)

[[12  1]
 [ 4  0]]


In [55]:
x2_sub[0, 0] = 99
print(x2_sub)

[[99  1]
 [ 4  0]]


In [56]:
print(x2)

[[99  1  3  7]
 [ 4  0  2  3]
 [ 0  0  6  9]]


### Creating Copies of Arrays

In [57]:
x2_sub_copy = x2[:2, :2].copy()
print(x2_sub_copy)

[[99  1]
 [ 4  0]]


In [58]:
x2_sub_copy[0, 0] = 42
print(x2_sub_copy)

[[42  1]
 [ 4  0]]


In [59]:
print(x2)

[[99  1  3  7]
 [ 4  0  2  3]
 [ 0  0  6  9]]


### Reshaping of Arrays

In [60]:
grid = np.arange(1, 10).reshape(3, 3)
print(grid)

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [61]:
grid_test_1 = np.arange(30, 45).reshape(5, 3)
print(grid_test_1)

[[30 31 32]
 [33 34 35]
 [36 37 38]
 [39 40 41]
 [42 43 44]]


In [62]:
x = np.array([1, 2, 3])
x.reshape((1, 3))

array([[1, 2, 3]])

In [63]:
x.reshape((3, 1))

array([[1],
       [2],
       [3]])

In [64]:
x[np.newaxis, :]

array([[1, 2, 3]])

In [65]:
x[:, np.newaxis]

array([[1],
       [2],
       [3]])

### Concatenation of Arrays

In [66]:
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
np.concatenate([x, y])

array([1, 2, 3, 3, 2, 1])

In [67]:
z = np.array([99, 99, 99])
print(np.concatenate([x, y, z]))

[ 1  2  3  3  2  1 99 99 99]


In [68]:
grid = np.array([[1, 2, 3],
                 [4, 5, 6]])
np.concatenate([grid, grid])

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [69]:
np.concatenate([grid, grid], axis=1)

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

In [70]:
np.vstack([x, grid])

array([[1, 2, 3],
       [1, 2, 3],
       [4, 5, 6]])

In [71]:
x

array([1, 2, 3])

In [72]:
grid

array([[1, 2, 3],
       [4, 5, 6]])

In [73]:
np.vstack([x, grid])

array([[1, 2, 3],
       [1, 2, 3],
       [4, 5, 6]])

In [74]:
y = np.array([[99],
              [99]])
np.hstack([grid, y])

array([[ 1,  2,  3, 99],
       [ 4,  5,  6, 99]])

### Splitting of Arrays

In [75]:
x = [1, 2, 3, 99, 99, 3, 2, 1]
x1, x2, x3 = np.split(x, [3, 5])
print(x1, x2, x3)

[1 2 3] [99 99] [3 2 1]


In [76]:
grid = np.arange(16).reshape((4, 4))
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [78]:
upper, lower = np.vsplit(grid, [2])
print(upper)
print('\n')
print(lower)

[[0 1 2 3]
 [4 5 6 7]]


[[ 8  9 10 11]
 [12 13 14 15]]


In [79]:
left, right = np.hsplit(grid, [2])
print(left)
print('\n')
print(right)

[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]


[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


### The Slowness of Loops

In [80]:
import numpy as np
rng = np.random.default_rng(seed=1701)

def compute_reciprocals(values):
    output = np.empty(len(values))
    for i in range(len(values)):
        output[i] = 1.0 / values[i]
    return output

values = rng.integers(1, 10, size=5)
compute_reciprocals(values)

array([0.11111111, 0.25      , 1.        , 0.33333333, 0.125     ])

In [82]:
big_array = rng.integers(1, 100, size = 1_000_000)
%timeit compute_reciprocals(big_array)

2.53 s ± 77.4 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


### Introducing Ufuncs

In [83]:
print(compute_reciprocals(values))
print(1 / values)

[0.11111111 0.25       1.         0.33333333 0.125     ]
[0.11111111 0.25       1.         0.33333333 0.125     ]


In [84]:
%timeit (1 / big_array)

3.55 ms ± 78.6 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [89]:
np.arange(5) / np.arange(1, 6)

array([0.        , 0.5       , 0.66666667, 0.75      , 0.8       ])

In [99]:
x = np.arange(9).reshape((3, 3))
2 ** x

array([[  1,   2,   4],
       [  8,  16,  32],
       [ 64, 128, 256]], dtype=int32)

In [96]:
np.arange(9).reshape((3, 3))

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

### Array Arithmetic

In [100]:
x = np.arange(4)
print('x      =', x)
print('x + 5  =', x + 5)
print('x - 5  =', x - 5)
print('x * 2  =', x * 2)
print('x / 2  =', x / 2)
print('x // 2 =', x // 2)

x      = [0 1 2 3]
x + 5  = [5 6 7 8]
x - 5  = [-5 -4 -3 -2]
x * 2  = [0 2 4 6]
x / 2  = [0.  0.5 1.  1.5]
x // 2 = [0 0 1 1]
