In [2]:
import numpy as np

# Basics

## Array Attributes

In [3]:
rng = np.random.default_rng(seed=1701) # seed for reproducibility

x1 = rng.integers(10, size=6) # one-dimensional array
x2 = rng.integers(10, size=(3, 4)) # two-dimensional array
x3 = rng.integers(10, size=(3, 4, 5)) # three-dimensional array

In [4]:
 print("x3 ndim: ", x3.ndim)
 print("x3 shape:", x3.shape)
 print("x3 size: ", x3.size)
 print("dtype: ", x3.dtype)

x3 ndim:  3
x3 shape: (3, 4, 5)
x3 size:  60
dtype:  int64


## Array Indexing

In [5]:
x1

array([9, 4, 0, 3, 8, 6], dtype=int64)

In [6]:
# To index from the end of the array, you can use negative indices
x1[2], x1[-2]

(0, 8)

In [7]:
x2

array([[3, 1, 3, 7],
       [4, 0, 2, 3],
       [0, 0, 6, 9]], dtype=int64)

In [8]:
x2[1, 2]

2

Keep in mind that, unlike Python lists, NumPy arrays have a fixed type. This means,
for example, that if you attempt to insert a floating-point value into an integer array,
the value will be silently truncated. Don’t be caught unaware by this behavior!

In [9]:
x1[0] = 3.14159
x1

array([3, 4, 0, 3, 8, 6], dtype=int64)

### Fancy Indexing

In [10]:
# fancy indexing means you use an array as index
ind = [2, 3, 4]
x1[ind]

array([0, 3, 8], dtype=int64)

In [11]:
# shape of result reflects the shape of the index array 
# not the shape of the array being indexed

ind = np.array([[3, 2], [4, 1]])
x1[ind]

array([[3, 0],
       [8, 4]], dtype=int64)

## Array Slicing

### For 1D Array

In [12]:
x1

array([3, 4, 0, 3, 8, 6], dtype=int64)

In [13]:
x1[:3] # first three elements

array([3, 4, 0], dtype=int64)

In [14]:
x1[3:] # elements from index 3

array([3, 8, 6], dtype=int64)

In [15]:
x1[1:4] # middle subarray

array([4, 0, 3], dtype=int64)

In [16]:
x1[::2] # every second element

array([3, 0, 8], dtype=int64)

In [17]:
x1[1::2] # every second element, starting at index 1

array([4, 3, 6], dtype=int64)

In [18]:
x1[::-1] # all elements, reversed

array([6, 8, 3, 0, 4, 3], dtype=int64)

In [19]:
x1[4::-2] # every second element from index 4, reversed

array([8, 0, 3], dtype=int64)

### For 2D Array

In [20]:
x2

array([[3, 1, 3, 7],
       [4, 0, 2, 3],
       [0, 0, 6, 9]], dtype=int64)

In [21]:
x2[:2, :3] # first two rows & three columns

array([[3, 1, 3],
       [4, 0, 2]], dtype=int64)

In [22]:
x2[:3, ::2] # three rows, every second column

array([[3, 3],
       [4, 2],
       [0, 6]], dtype=int64)

In [23]:
x2[::-1, ::-1] # all rows & columns, reversed

array([[9, 6, 0, 0],
       [3, 2, 0, 4],
       [7, 3, 1, 3]], dtype=int64)

One commonly needed routine is accessing single rows or columns of an array. This
can be done by combining indexing and slicing, using an empty slice marked by a
single colon (:):

In [24]:
x2[:, 0] # first column of x2

array([3, 4, 0], dtype=int64)

In [25]:
x2[0, :] # first row of x2

array([3, 1, 3, 7], dtype=int64)

In the case of row access, the empty slice can be omitted for a more compact syntax:

In [26]:
x2[0] # equivalent to x2[0, :]

array([3, 1, 3, 7], dtype=int64)

### Slices are Views

In [27]:
x2

array([[3, 1, 3, 7],
       [4, 0, 2, 3],
       [0, 0, 6, 9]], dtype=int64)

In [28]:
x2_sub = x2[:2, :2]
x2_sub

array([[3, 1],
       [4, 0]], dtype=int64)

In [29]:
x2_sub[0, 0] = 99
x2_sub    

array([[99,  1],
       [ 4,  0]], dtype=int64)

In [30]:
x2 # original is changed

array([[99,  1,  3,  7],
       [ 4,  0,  2,  3],
       [ 0,  0,  6,  9]], dtype=int64)

### What if you need a Copy?

In [31]:
x2_sub_copy = x2[:2, :2].copy()
x2_sub_copy

array([[99,  1],
       [ 4,  0]], dtype=int64)

### Reshaping

In [32]:
grid = np.arange(1, 10)
grid

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [33]:
grid = grid.reshape((3, 3))
grid

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

A common reshaping operation is converting a one-dimensional array into a twodimensional row or column matrix:

In [34]:
x = np.array([1, 2, 3])
x

array([1, 2, 3])

In [35]:
x.reshape((1, 3)) # row vector via reshape

array([[1, 2, 3]])

In [36]:
x.reshape((3, 1)) # column vector via reshape

array([[1],
       [2],
       [3]])

A convenient shorthand for this is to use np.newaxis in the slicing syntax:

In [37]:
x[np.newaxis, :] # row vector via newaxis

array([[1, 2, 3]])

In [38]:
x[:, np.newaxis] # column vector via newaxis

array([[1],
       [2],
       [3]])

## Array Concatenation

### Concatenate

In [39]:
x = np.array([1, 2, 3])
y = np.array([3, 2, 1])
print(x, y)

[1 2 3] [3 2 1]


In [40]:
np.concatenate([x, y])

array([1, 2, 3, 3, 2, 1])

In [41]:
grid = np.array([[1, 2, 3], [4, 5, 6]])

In [42]:
# concatenate along the first axis
np.concatenate([grid, grid])

array([[1, 2, 3],
       [4, 5, 6],
       [1, 2, 3],
       [4, 5, 6]])

In [43]:
# concatenate along the second axis (zero-indexed)
np.concatenate([grid, grid], axis=1)

# The axis along which the arrays will be joined. 
# If axis is None, arrays are flattened before use. 
# Default is 0.

array([[1, 2, 3, 1, 2, 3],
       [4, 5, 6, 4, 5, 6]])

### Stacking

For working with arrays of mixed dimensions, it can be clearer to use the np.vstack
(vertical stack) and np.hstack (horizontal stack) functions:

In [44]:
# vertically stack the arrays
np.vstack([x, grid])

array([[1, 2, 3],
       [1, 2, 3],
       [4, 5, 6]])

In [45]:
# horizontally stack the arrays
y = np.array([
    [99], 
    [99]
])
np.hstack([grid, y])

array([[ 1,  2,  3, 99],
       [ 4,  5,  6, 99]])

## Array Splitting

In [68]:
x = [1, 2, 3, 99, 99, 3, 2, 1]
x1, x2, x3 = np.split(x, [3, 5])
print(x1, x2, x3)

[1 2 3] [99 99] [3 2 1]


In [47]:
grid = np.arange(16).reshape((4, 4))
grid

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [48]:
upper, lower = np.vsplit(grid, [2])
print(upper)
print(lower)

[[0 1 2 3]
 [4 5 6 7]]
[[ 8  9 10 11]
 [12 13 14 15]]


In [49]:
left, right = np.hsplit(grid, [2])
print(left)
print(right)

[[ 0  1]
 [ 4  5]
 [ 8  9]
 [12 13]]
[[ 2  3]
 [ 6  7]
 [10 11]
 [14 15]]


## Masks

In [50]:
x = np.array([1, 2, 3, 4, 5])
x < 3

array([ True,  True, False, False, False])

In [51]:
# To count the number of True entries 
np.count_nonzero(x < 6)
np.sum(x < 6) # axis can be specified for multi-dimensional arrays

5

In [52]:
np.any(x > 8) # axis can be specified for multi-dimensional arrays

False

In [53]:
np.all(x < 10) # axis can be specified for multi-dimensional arrays

True

### Masking

In [54]:
x2 = rng.integers(10, size=(3, 4))
x2

array([[9, 3, 6, 6],
       [5, 0, 2, 5],
       [6, 4, 8, 2]], dtype=int64)

In [55]:
x < 5

array([ True,  True,  True,  True, False])

In [56]:
x[x < 5]

array([1, 2, 3, 4])

# Universal Functions

## Aggregations

A reduce repeatedly applies a given operation to the elements
of an array until only a single result remains.

### Reduce

In [57]:
# returns sum of all elements
x = np.arange(1, 6)
np.add.reduce(x)

15

In [58]:
np.multiply.reduce(x)

120

### Accumulate

In [59]:
# to store all the intermediate results use accumulate instead
np.add.accumulate(x)

array([ 1,  3,  6, 10, 15])

# Structured Array

Provides efficient storage for compound, heterogeneous data

In [60]:
# simple unstructured array as seen before
x = np.zeros(4, dtype=int) 
x

array([0, 0, 0, 0])

In [61]:
# Use a compound data type for structured arrays
data = np.zeros(4, dtype={
    'names':('name', 'age', 'weight'),
    'formats':('U10', 'i4', 'f8')
})
data

array([('', 0, 0.), ('', 0, 0.), ('', 0, 0.), ('', 0, 0.)],
      dtype=[('name', '<U10'), ('age', '<i4'), ('weight', '<f8')])

In [62]:
name = ['Alice', 'Bob', 'Cathy', 'Doug']
age = [25, 45, 37, 19]
weight = [55.0, 85.5, 68.0, 61.5]

In [63]:
data['name'] = name
data['age'] = age
data['weight'] = weight
print(data)

[('Alice', 25, 55. ) ('Bob', 45, 85.5) ('Cathy', 37, 68. )
 ('Doug', 19, 61.5)]


## Indexing

In [64]:
# Get all names
data['name']

array(['Alice', 'Bob', 'Cathy', 'Doug'], dtype='<U10')

In [65]:
# Get first row of dat
data[0]

('Alice', 25, 55.)

In [66]:
# Get the name from the last row
data[-1]['name']

'Doug'

In [67]:
# Get names where age is under 30
data[data['age'] < 30]['name']

array(['Alice', 'Doug'], dtype='<U10')