In [1]:
import numpy as np

NumPy is useful to us mainly as a way to efficiently do math on collections of data. The core method/class we will use is `np.array`.

In [2]:
list_of_nums = np.array([1, 2, 3, 4, 5])
matrix_of_nums = np.array([[1, 2, 3],
                           [4, 5, 6],
                           [7, 8, 9]])

In [5]:
list_of_nums

array([1, 2, 3, 4, 5])

We can perform mathematical operations over an entire array.

In [11]:
list_of_nums + 4

array([5, 6, 7, 8, 9])

In [12]:
matrix_of_nums ** 2

array([[ 1,  4,  9],
       [16, 25, 36],
       [49, 64, 81]])

We can even perform operations with multiple arrays.

In [13]:
matrix_of_nums + matrix_of_nums

array([[ 2,  4,  6],
       [ 8, 10, 12],
       [14, 16, 18]])

In [14]:
list_of_nums + list_of_nums

array([ 2,  4,  6,  8, 10])

In [15]:
matrix_of_nums

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [16]:
# What is happening here?
matrix_of_nums + np.array([[1], 
                           [2], 
                           [3]])

array([[ 2,  3,  4],
       [ 6,  7,  8],
       [10, 11, 12]])

In [17]:
# What is happening here?
matrix_of_nums + np.array([[1, 2], 
                           [2, 3], 
                           [3, 4]])

ValueError: operands could not be broadcast together with shapes (3,3) (3,2) 

In [32]:
# What is happening here?
matrix_of_nums + np.array([1, 2, float('inf')])

array([[  2.,   4.,  inf],
       [  5.,   7.,  inf],
       [  8.,  10.,  inf]])

What happens when we try to add things of different dimensions?

In [19]:
matrix_of_nums + list_of_nums

ValueError: operands could not be broadcast together with shapes (3,3) (5,) 

We can get information about the array.

In [20]:
print(matrix_of_nums.size)
print(matrix_of_nums.ndim)
print(matrix_of_nums.shape)
print(matrix_of_nums.dtype)

9
2
(3, 3)
int64


In [21]:
# Specifying type
np.array([1, 2, 3]).dtype

dtype('int64')

In [28]:
np.array([1, 2, "asd"])

array(['1', '2', 'asd'], 
      dtype='<U21')

# Creating different arrays

Often, we want to create arrays of random numbers, or of a singular number, or maybe we don't even know what number, but we know the size of the array.

In [29]:
np.zeros((4, 4))

array([[ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.]])

In [31]:
np.ones((3, 2, 3, 3))

array([[[[ 1.,  1.,  1.],
         [ 1.,  1.,  1.],
         [ 1.,  1.,  1.]],

        [[ 1.,  1.,  1.],
         [ 1.,  1.,  1.],
         [ 1.,  1.,  1.]]],


       [[[ 1.,  1.,  1.],
         [ 1.,  1.,  1.],
         [ 1.,  1.,  1.]],

        [[ 1.,  1.,  1.],
         [ 1.,  1.,  1.],
         [ 1.,  1.,  1.]]],


       [[[ 1.,  1.,  1.],
         [ 1.,  1.,  1.],
         [ 1.,  1.,  1.]],

        [[ 1.,  1.,  1.],
         [ 1.,  1.,  1.],
         [ 1.,  1.,  1.]]]])

In [33]:
np.random.random((2, 5))

array([[ 0.09564738,  0.97044326,  0.6234848 ,  0.95440092,  0.68686566],
       [ 0.82797608,  0.7185471 ,  0.44867959,  0.12169156,  0.64678417]])

In [34]:
np.linspace(0, 1, 100)

array([ 0.        ,  0.01010101,  0.02020202,  0.03030303,  0.04040404,
        0.05050505,  0.06060606,  0.07070707,  0.08080808,  0.09090909,
        0.1010101 ,  0.11111111,  0.12121212,  0.13131313,  0.14141414,
        0.15151515,  0.16161616,  0.17171717,  0.18181818,  0.19191919,
        0.2020202 ,  0.21212121,  0.22222222,  0.23232323,  0.24242424,
        0.25252525,  0.26262626,  0.27272727,  0.28282828,  0.29292929,
        0.3030303 ,  0.31313131,  0.32323232,  0.33333333,  0.34343434,
        0.35353535,  0.36363636,  0.37373737,  0.38383838,  0.39393939,
        0.4040404 ,  0.41414141,  0.42424242,  0.43434343,  0.44444444,
        0.45454545,  0.46464646,  0.47474747,  0.48484848,  0.49494949,
        0.50505051,  0.51515152,  0.52525253,  0.53535354,  0.54545455,
        0.55555556,  0.56565657,  0.57575758,  0.58585859,  0.5959596 ,
        0.60606061,  0.61616162,  0.62626263,  0.63636364,  0.64646465,
        0.65656566,  0.66666667,  0.67676768,  0.68686869,  0.69

In [38]:
np.arange(10, 100, 3)

array([10, 13, 16, 19, 22, 25, 28, 31, 34, 37, 40, 43, 46, 49, 52, 55, 58,
       61, 64, 67, 70, 73, 76, 79, 82, 85, 88, 91, 94, 97])

What if I want a range, but in a different shape?

In [39]:
my_range = np.arange(20)
my_range

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [49]:
my_range = my_range.reshape((4, 5))
my_range

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

Can I rotate this?

In [50]:
my_range.swapaxes(0, 1)

array([[ 0,  5, 10, 15],
       [ 1,  6, 11, 16],
       [ 2,  7, 12, 17],
       [ 3,  8, 13, 18],
       [ 4,  9, 14, 19]])

So far, this is _awesome_.

# Calculations

Let's get a random array of integers.

In [51]:
nums = np.random.randint(1, 100, (4, 5))
# Ok, NumPy, I am now mad at you b/c your randint works differently from random.randint.
nums

array([[44, 59, 15, 88, 20],
       [72, 71, 12, 94, 31],
       [93, 64, 29, 24, 36],
       [72, 10, 49, 35, 28]])

What's the max, min, mean, and sum of these numbers?

In [52]:
print("Max:", nums.max())
print("Min:", nums.min())
print("Avg:", nums.mean())
print("Sum:", nums.sum())

Max: 94
Min: 10
Avg: 47.3
Sum: 946


That's great, but what if I want to know this for each row?

In [53]:
print("Max:", nums.max(axis=1))
print("Min:", nums.min(axis=1))
print("Avg:", nums.mean(axis=1))
print("Sum:", nums.sum(axis=1))

Max: [88 94 93 72]
Min: [15 12 24 10]
Avg: [ 45.2  56.   49.2  38.8]
Sum: [226 280 246 194]


And column?

In [54]:
print("Max:", nums.max(axis=0))
print("Min:", nums.min(axis=0))
print("Avg:", nums.mean(axis=0))
print("Sum:", nums.sum(axis=0))

Max: [93 71 49 94 36]
Min: [44 10 12 24 20]
Avg: [ 70.25  51.    26.25  60.25  28.75]
Sum: [281 204 105 241 115]


Note that axis 0 is what we'd think of as a column, and axis 1 is the row. This is, to me, the most confusing thing in NumPy.

I can also get the cumulative sum along an axis.

In [55]:
nums

array([[44, 59, 15, 88, 20],
       [72, 71, 12, 94, 31],
       [93, 64, 29, 24, 36],
       [72, 10, 49, 35, 28]])

In [56]:
nums.cumsum(axis=0)

array([[ 44,  59,  15,  88,  20],
       [116, 130,  27, 182,  51],
       [209, 194,  56, 206,  87],
       [281, 204, 105, 241, 115]])

In [57]:
nums.cumsum(axis=1)

array([[ 44, 103, 118, 206, 226],
       [ 72, 143, 155, 249, 280],
       [ 93, 157, 186, 210, 246],
       [ 72,  82, 131, 166, 194]])

# Indexing

You can get elements out of your NumPy arrays much like you would with a list.

In [75]:
nums = np.random.randint(1, 100, (4, 5))
nums

array([[90, 85, 45, 95, 36],
       [32, 63, 80, 60, 10],
       [55, 55, 82, 68, 72],
       [77, 37, 17, 86, 99]])

In [76]:
nums[0,0]

90

In [77]:
nums[2,3]

68

In [78]:
# WHOA
nums[0:2, 1:3]

array([[85, 45],
       [63, 80]])

Note that this is **not** a copy -- it is a view of the original array. This is very different from lists.

In [79]:
nums_slice = nums[0:2, 1:3]
print("Before change", repr(nums_slice), sep="\n")
nums_slice[0,0] = 0
print("Changed", repr(nums_slice), sep="\n")
print("Original", repr(nums), sep="\n")

Before change
array([[85, 45],
       [63, 80]])
Changed
array([[ 0, 45],
       [63, 80]])
Original
array([[90,  0, 45, 95, 36],
       [32, 63, 80, 60, 10],
       [55, 55, 82, 68, 72],
       [77, 37, 17, 86, 99]])


In [80]:
nums

array([[90,  0, 45, 95, 36],
       [32, 63, 80, 60, 10],
       [55, 55, 82, 68, 72],
       [77, 37, 17, 86, 99]])

In [81]:
nums[::-1, ::-1]

array([[99, 86, 17, 37, 77],
       [72, 68, 82, 55, 55],
       [10, 60, 80, 63, 32],
       [36, 95, 45,  0, 90]])

In [82]:
nums[0:4:2, 1:4]

array([[ 0, 45, 95],
       [55, 82, 68]])

In [83]:
nums

array([[90,  0, 45, 95, 36],
       [32, 63, 80, 60, 10],
       [55, 55, 82, 68, 72],
       [77, 37, 17, 86, 99]])

In [85]:
rotated = nums[::-1, ::-1]
rotated[0,0] = 11
nums

array([[90,  0, 45, 95, 36],
       [32, 63, 80, 60, 10],
       [55, 55, 82, 68, 72],
       [77, 37, 17, 86, 11]])

In [88]:
rotated = nums[::-1, ::-1].copy()
rotated[0,0] = 9999
nums

array([[90,  0, 45, 95, 36],
       [32, 63, 80, 60, 10],
       [55, 55, 82, 68, 72],
       [77, 37, 17, 86, 11]])

# Histograms

Here's a cool function for summarizing data.

In [89]:
nums

array([[90,  0, 45, 95, 36],
       [32, 63, 80, 60, 10],
       [55, 55, 82, 68, 72],
       [77, 37, 17, 86, 11]])

In [91]:
np.histogram(nums, bins=3)

(array([4, 8, 8]),
 array([  0.        ,  31.66666667,  63.33333333,  95.        ]))

In [92]:
speeds = np.random.randint(0, 100, (30, 60))

In [93]:
speeds

array([[96, 60, 81, ..., 62, 89, 21],
       [98, 15, 51, ..., 93, 16, 70],
       [90, 22, 87, ..., 53, 85, 27],
       ..., 
       [27, 53, 39, ..., 13, 68, 49],
       [33, 33, 89, ..., 22, 52, 99],
       [ 3, 27, 87, ...,  5, 49, 39]])

In [99]:
speeds.mean(axis=0)[1]

46.133333333333333

In [102]:
speeds.mean()

49.098333333333336

## List comprehensions (and the rest of Python) & NumPy

In [108]:
[x for x in np.random.randint(0, 10, (10,))]

[9, 2, 7, 5, 7, 5, 6, 3, 1, 6]

In [114]:
[x for x in np.random.randint(0, 10, (10,2))]

4.5

In [115]:
import statistics as st

In [116]:
st.mean(np.random.randint(0, 10, (10, )))

5.5

In [117]:
sorted(np.random.randint(0, 10, (10, )))

[3, 5, 6, 6, 6, 6, 7, 8, 8, 9]

In [118]:
a = np.random.randint(0, 10, (4,5))
a

array([[2, 1, 5, 9, 2],
       [7, 7, 5, 8, 4],
       [0, 3, 8, 0, 9],
       [5, 7, 9, 5, 8]])

In [119]:
a.sort(axis=0)
a

array([[0, 1, 5, 0, 2],
       [2, 3, 5, 5, 4],
       [5, 7, 8, 8, 8],
       [7, 7, 9, 9, 9]])

# References

* [NumPy](http://www.numpy.org/)
* [Official NumPy tutorial](http://www.scipy.org/Tentative_NumPy_Tutorial)
* [NumPy Example List](http://wiki.scipy.org/Numpy_Example_List)
* [NumPy MedKit slides](http://mentat.za.net/numpy/numpy_advanced_slides/)