# Intro to NumPy (Numerical Python)
Reference:  Chapter 2 of "Python Data Science Handbook" by Jake VanderPlas

In [1]:
import numpy as np

To see detailed documentation...

`np?`

In [2]:
np?

## A single-dimensional array (vector)

In [3]:
# Create an integer array
np.array([1, 5, 7, 3, 2, 1])

array([1, 5, 7, 3, 2, 1])

In [4]:
# Or, use an existing Python list to create a numpy array:
a = [1, 5, 7, 3, 2, 1]
np.array(a)

array([1, 5, 7, 3, 2, 1])

In [5]:
# What if we have a mix of ints and floats?
# Each entry is "upcasted" (in this case, to floats)
np.array([1, 3.2, 5])

array([1. , 3.2, 5. ])

In [6]:
# Determine the datatype of the numpy array:
b = np.array([1, 3.2, 5])
b.dtype

dtype('float64')

In [7]:
# Or n-dimensional array (ndarray) has only 1 dimension...it's a vector:
b.ndim

1

In [8]:
# Since we only have one dimension, the shape will only show us the length of that dimesion:
b.shape

(3,)

In [9]:
# It has 3 elements in it:
b.size

3

## Multidimensional arrays (matrices)

In [10]:
# We can create an ndarray with 2 rows and 3 columns:
n = np.array( [ [1, 2, 3], [4, 5, 6] ])
n

array([[1, 2, 3],
       [4, 5, 6]])

In [11]:
# We can access row 1, column 2...
n[0][1]

2

In [12]:
# But numpy also allows this syntax:
n[0,1]

2

In [13]:
# We can grab all of the data in row 1:
n[0]

array([1, 2, 3])

In [14]:
n[0,:]   # equivalent to previous line

array([1, 2, 3])

In [15]:
n[0][:]   # equivalent to previous two lines

array([1, 2, 3])

In [16]:
# Create a 3x4 array initialized with 0's:
c = np.zeros((3,4))
c

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [17]:
# Note that it defaults to floats
c.dtype

dtype('float64')

In [18]:
# We can explicitly define the datatype to be integer:
d = np.zeros((3,4), dtype=np.int16)
d

array([[0, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]], dtype=int16)

In [19]:
d.dtype

dtype('int16')

In [20]:
# What if we try to insert a float?
d[0,0] = 3.8

In [21]:
d.dtype

dtype('int16')

In [22]:
# Where did 3.4 go?
d

array([[3, 0, 0, 0],
       [0, 0, 0, 0],
       [0, 0, 0, 0]], dtype=int16)

In [23]:
# Create a 4x5 array of 1's:
np.ones((4,5))

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [24]:
# Use "arange":
np.arange(10, 20)

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [25]:
np.arange(10, 20, 2)                 # Starting at 10, skip by 2, continue up to 20-1

array([10, 12, 14, 16, 18])

In [26]:
np.linspace( 0, 3, 9 )                 # 9 numbers from 0 to 3

array([0.   , 0.375, 0.75 , 1.125, 1.5  , 1.875, 2.25 , 2.625, 3.   ])

In [27]:
# Populate a 2x4 array with 9.72 as the value everywhere:
np.full((2,4), 9.72)

array([[9.72, 9.72, 9.72, 9.72],
       [9.72, 9.72, 9.72, 9.72]])

In [28]:
# Create a 3x3 array of uniformly distributed
# random values between 0 and 1
np.random.random((3,3))

array([[0.09859415, 0.42619156, 0.99293517],
       [0.94713508, 0.06483504, 0.1075727 ],
       [0.99754499, 0.44650097, 0.29326434]])

In [29]:
# Create a 3x3 array of normally distributed
# random values with mean 0 and std. dev. 1
np.random.normal(0, 1, (3,3))

array([[-0.44518363, -1.22674058,  0.86166523],
       [ 0.96132298, -0.25938158, -0.2612799 ],
       [ 0.28917722,  1.62419456, -0.8715957 ]])

In [30]:
# Create a 3x3 array of normally distributed
# random values with mean 10 and std. dev. 2
np.random.normal(10, 2, (3,3))

array([[ 9.24060436,  8.58838752, 13.61089954],
       [10.11407723,  9.29874865,  8.54399087],
       [11.78794932, 11.34864688, 12.93250814]])

In [31]:
# Create a 3x3 identity matrix
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [32]:
# Create a 3x3 array of random integers 
# in the interval [5,15)
np.random.randint(5, 15, (3,3))

array([[ 7, 11,  5],
       [ 5, 14, 14],
       [ 9, 14,  5]])

In [33]:
# Use a seed to reproduce random numbers
np.random.seed(0)

# 1-dimensional array with integer values in range [0,10)
np.random.randint(10, size=6)

array([5, 0, 3, 3, 7, 9])

In [34]:
# 2-dimensional array with integer values in range [0,10)
np.random.randint(10, size=(3, 4))

array([[3, 5, 2, 4],
       [7, 6, 8, 8],
       [1, 6, 7, 7]])

In [35]:
# 3-dimensional ...
np.random.randint(10, size=(3, 4, 5))

array([[[8, 1, 5, 9, 8],
        [9, 4, 3, 0, 3],
        [5, 0, 2, 3, 8],
        [1, 3, 3, 3, 7]],

       [[0, 1, 9, 9, 0],
        [4, 7, 3, 2, 7],
        [2, 0, 0, 4, 5],
        [5, 6, 8, 4, 1]],

       [[4, 9, 8, 1, 1],
        [7, 9, 9, 3, 6],
        [7, 2, 0, 3, 5],
        [9, 4, 4, 6, 4]]])

In [36]:
np.random.randint(10, size=(5, 4, 3))

array([[[4, 3, 4],
        [4, 8, 4],
        [3, 7, 5],
        [5, 0, 1]],

       [[5, 9, 3],
        [0, 5, 0],
        [1, 2, 4],
        [2, 0, 3]],

       [[2, 0, 7],
        [5, 9, 0],
        [2, 7, 2],
        [9, 2, 3]],

       [[3, 2, 3],
        [4, 1, 2],
        [9, 1, 4],
        [6, 8, 2]],

       [[3, 0, 0],
        [6, 0, 6],
        [3, 3, 8],
        [8, 8, 2]]])

## Accessing individual elements

In [37]:
a = np.arange(15)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [38]:
# Get the 3rd element:
a[2]

2

In [39]:
# Get the last element:
a[-1]

14

In [40]:
# Get the 2nd to last element:
a[-2]

13

In [41]:
b = np.random.randint(5, 15, (3,4))
b

array([[ 8,  7,  5, 13],
       [13,  8, 13,  7],
       [13,  9,  8,  5]])

In [42]:
b[2,3]

5

## Array Slices

In [43]:
a = np.arange(15)
a

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [44]:
# Get the first 3 elements of a:
a[:3]

array([0, 1, 2])

In [45]:
# Get the last 3 elements:
a[-3:]

array([12, 13, 14])

In [46]:
# Get the elements from index 3 forward:
a[3:]

array([ 3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [47]:
# Something in the middle:
a[2:5]

array([2, 3, 4])

In [48]:
# Every other element, starting with index 4:
a[4::2]

array([ 4,  6,  8, 10, 12, 14])

In [49]:
# Reverse the array:
a[::-1]

array([14, 13, 12, 11, 10,  9,  8,  7,  6,  5,  4,  3,  2,  1,  0])

In [50]:
# Get every other element, starting at index 10 and counting backwards by 2:
a[10::-2]

array([10,  8,  6,  4,  2,  0])

In [51]:
# Get every other element, starting at index 10, counting backwards by 2, and stopping before index 4:
a[10:4:-2]      # [start : end : decrement]

array([10,  8,  6])

In [52]:
a[4:10:2]

array([4, 6, 8])

## Multidimensional Slices

In [53]:
b = np.random.randint(5, 50, (4,7))
b

array([[ 9,  8, 16, 27, 18, 16, 21],
       [29, 34, 26, 30, 21, 24, 38],
       [45, 37, 41, 11, 26, 36, 18],
       [12, 29, 20, 46, 23, 45, 20]])

In [54]:
# Get everything in the 2nd column:
b[:, 1]

array([ 8, 34, 37, 29])

In [55]:
# Get everything in the 3rd row:
b[2, :]

array([45, 37, 41, 11, 26, 36, 18])

In [56]:
# Get the first 2 rows and the first 4 columns
b[:2, :4]

array([[ 9,  8, 16, 27],
       [29, 34, 26, 30]])

In [57]:
# Get every other row, starting with row 0.  Also get the first 4 columns
b[::2, :4]

array([[ 9,  8, 16, 27],
       [45, 37, 41, 11]])

In [58]:
# Get every other row, starting with row 1.  Also get the first 4 columns
b[1::2, :4]

array([[29, 34, 26, 30],
       [12, 29, 20, 46]])

In [59]:
# Reverse the colums (but keep the rows unchanged):
b[:, ::-1]

array([[21, 16, 18, 27, 16,  8,  9],
       [38, 24, 21, 30, 26, 34, 29],
       [18, 36, 26, 11, 41, 37, 45],
       [20, 45, 23, 46, 20, 29, 12]])

## Updating Array Slices

**Caution**:  If you make changes to a slice, it also changes the original array.

- Why might this be useful?

In [60]:
b = np.random.randint(5, 50, (4,7))
b

array([[16, 43, 34,  6, 36, 49, 29],
       [29,  8, 23,  8, 47, 17, 43],
       [40, 27, 10, 28, 48, 37, 16],
       [45, 25, 15, 48, 42, 33, 45]])

In [61]:
# Let's look at the 2x2 array in the NW corner:
b_nw = b[:2, :2]
b_nw

array([[16, 43],
       [29,  8]])

In [62]:
# Suppose we change row 1, column 1:
b_nw[0, 0] = 111
b_nw

array([[111,  43],
       [ 29,   8]])

In [63]:
# Look at the original array:
b

array([[111,  43,  34,   6,  36,  49,  29],
       [ 29,   8,  23,   8,  47,  17,  43],
       [ 40,  27,  10,  28,  48,  37,  16],
       [ 45,  25,  15,  48,  42,  33,  45]])

In [64]:
# Suppose we change the original array:
b[0,1] = -88
b_nw
# Note that this modified our sub-array

array([[111, -88],
       [ 29,   8]])

In [65]:
# Now, suppose we modify several elements of our sub-array at once:
b_nw[1,:] = 3
b_nw

array([[111, -88],
       [  3,   3]])

In [66]:
b

array([[111, -88,  34,   6,  36,  49,  29],
       [  3,   3,  23,   8,  47,  17,  43],
       [ 40,  27,  10,  28,  48,  37,  16],
       [ 45,  25,  15,  48,  42,  33,  45]])

## Copying Arrays

In [67]:
b_copy = b.copy()
b_copy

array([[111, -88,  34,   6,  36,  49,  29],
       [  3,   3,  23,   8,  47,  17,  43],
       [ 40,  27,  10,  28,  48,  37,  16],
       [ 45,  25,  15,  48,  42,  33,  45]])

In [68]:
b[0,0] = 999
b

array([[999, -88,  34,   6,  36,  49,  29],
       [  3,   3,  23,   8,  47,  17,  43],
       [ 40,  27,  10,  28,  48,  37,  16],
       [ 45,  25,  15,  48,  42,  33,  45]])

In [69]:
b_copy

array([[111, -88,  34,   6,  36,  49,  29],
       [  3,   3,  23,   8,  47,  17,  43],
       [ 40,  27,  10,  28,  48,  37,  16],
       [ 45,  25,  15,  48,  42,  33,  45]])

## Reshaping Arrays

In [70]:
c = np.arange(1,10)
c

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [71]:
# Explicitly provide the new shape:
c.reshape(3,3)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [72]:
# Provide the size of one dimension and let numpy determine the size of the other:
c.reshape(3,-1)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [73]:
# Convert a row vector to a column vector:
c.reshape(-1, 1)

array([[1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])

In [74]:
# Or, use "newaxis" to do the same thing:
d = c[:, np.newaxis]
d

array([[1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])

In [75]:
d.shape

(9, 1)

## Concatenate Arrays

In [76]:
# Single-dimensional arrays:
x = np.array([1, 2, 3])
y = np.array([4, 5, 6])
z = np.array([7, 8, 9])
np.concatenate([x, z, y])

array([1, 2, 3, 7, 8, 9, 4, 5, 6])

In [77]:
# Multi-dimensional arrays:
X = np.ones((2, 3))
Y = np.full((3,3), 5)

In [78]:
X

array([[1., 1., 1.],
       [1., 1., 1.]])

In [79]:
Y

array([[5, 5, 5],
       [5, 5, 5],
       [5, 5, 5]])

In [80]:
# By default, this will concatenate on the "first (index 0)" axis (rows)
# NOTE:  In this case, X and Y must have the same number of columns
np.concatenate([X, Y])

array([[1., 1., 1.],
       [1., 1., 1.],
       [5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.]])

In [81]:
# We can concatenate on the "second (index 1)" axis (columns),
# but we get an error here because X and Y have differing numbers of rows:
np.concatenate([X, Y], axis=1)

ValueError: ignored

In [83]:
Z = np.full((2,5), 7)
Z

array([[7, 7, 7, 7, 7],
       [7, 7, 7, 7, 7]])

In [84]:
np.concatenate([X,Z], axis=1)

array([[1., 1., 1., 7., 7., 7., 7., 7.],
       [1., 1., 1., 7., 7., 7., 7., 7.]])

### `hstack` and `vstack`

In [85]:
# Vertical stack
np.vstack([X, Y])

array([[1., 1., 1.],
       [1., 1., 1.],
       [5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.]])

In [86]:
# Horizontal stack
np.hstack([X, Z])

array([[1., 1., 1., 7., 7., 7., 7., 7.],
       [1., 1., 1., 7., 7., 7., 7., 7.]])

## Arithmetic

In [87]:
x = np.random.randint(-5,5, (1,5))
x

array([[-3, -2,  4,  2,  0]])

In [88]:
y = np.random.randint(10,20, (1,5))
y

array([[13, 14, 15, 13, 13]])

In [89]:
# Add 5 to every element of x:
x + 5

array([[2, 3, 9, 7, 5]])

In [90]:
# Add x and y:
x + y

array([[10, 12, 19, 15, 13]])

In [91]:
# Square every element of x:
x ** 2

array([[ 9,  4, 16,  4,  0]])

In [92]:
# Find the absolute values of each element of x:
np.abs(x)

array([[3, 2, 4, 2, 0]])

In [93]:
# Divide each element of x by 2:
x / 2

array([[-1.5, -1. ,  2. ,  1. ,  0. ]])

In [94]:
# Find the min value of x:
np.min(x)

-3

In [95]:
# Sum the values in x:
np.sum(x)

1

## Comparison Operators

In [96]:
x = np.arange(1,10)
x

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [97]:
x < 5

array([ True,  True,  True,  True, False, False, False, False, False])

In [98]:
x != 4

array([ True,  True,  True, False,  True,  True,  True,  True,  True])

In [99]:
x == 6

array([False, False, False, False, False,  True, False, False, False])

In [100]:
# How many elements are greater than 4?
np.sum(x > 4)

5

In [101]:
# Are there any values greater than 5?
np.any(x > 5)

True

In [102]:
# Are all values less than 8?
np.all(x < 8)

False

In [103]:
# What is the sum of the elements greater than 4?
np.sum(x[x > 4])

35

In [104]:
# What is the sum of the elements between 2 and 6?
np.sum(x[(x > 2) & (x < 6)])          # The parenthesis here are required

12

In [105]:
# What are all of the elements between 2 and 6?
x[(x > 2) & (x < 6)]          # The parenthesis here are required

array([3, 4, 5])

## Sorting arrays

In [106]:
x = np.array([9, 2, 1, 4, 7, 5])
x

array([9, 2, 1, 4, 7, 5])

In [107]:
np.sort(x)     # This doesn't actually change the ordering of x

array([1, 2, 4, 5, 7, 9])

In [108]:
x

array([9, 2, 1, 4, 7, 5])

In [109]:
# We can "sort-in-place":
x.sort()
x

array([1, 2, 4, 5, 7, 9])

In [110]:
# It's often beneficial to know the indices:
y = np.array([9, 2, 1, 4, 7, 5])
np.argsort(y)

array([2, 1, 3, 5, 4, 0])

In [111]:
# Sort in descending order:
y = np.array([9, 2, 1, 4, 7, 5])
y[::-1].sort()    # This first sorts y in ascending order (.sort()) then reverses the array to become descending
y

array([9, 7, 5, 4, 2, 1])

### Sorting along rows or columns

In [112]:
x = np.random.randint(2,12, (3,4))
x

array([[ 9, 11, 11, 11],
       [ 9,  5,  4,  5],
       [11,  9,  9,  7]])

In [113]:
# Sort each column:
np.sort(x, axis=0)

array([[ 9,  5,  4,  5],
       [ 9,  9,  9,  7],
       [11, 11, 11, 11]])

In [114]:
# Sort each row:
np.sort(x, axis=1)

array([[ 9, 11, 11, 11],
       [ 4,  5,  5,  9],
       [ 7,  9,  9, 11]])

In [115]:
# Sort an array by the nth column
Z = np.random.randint(0,10,(3,3))
print(Z)
print("\n")
print(Z[Z[:,1].argsort()])   # In this case, sort by the 2nd column

[[1 2 2]
 [8 1 5]
 [8 4 0]]


[[8 1 5]
 [1 2 2]
 [8 4 0]]
