In [1]:
import numpy as np

NumPy is useful to us mainly as a way to efficiently do math on collections of data. The core method/class we will use is `np.array`.

In [2]:
list_of_nums = np.array([1, 2, 3, 4, 5])
matrix_of_nums = np.array([[1, 2, 3],
                           [4, 5, 6],
                           [7, 8, 9]])

In [3]:
list_of_nums

array([1, 2, 3, 4, 5])

In [4]:
matrix_of_nums

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

We can perform mathematical operations over an entire array.

In [5]:
list_of_nums + 4

array([5, 6, 7, 8, 9])

In [6]:
matrix_of_nums ** 2

array([[ 1,  4,  9],
       [16, 25, 36],
       [49, 64, 81]])

We can even perform operations with multiple arrays.

In [9]:
matrix_of_nums + matrix_of_nums

array([[ 2,  4,  6],
       [ 8, 10, 12],
       [14, 16, 18]])

In [10]:
list_of_nums + list_of_nums

array([ 2,  4,  6,  8, 10])

In [12]:
matrix_of_nums

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [11]:
# What is happening here?
matrix_of_nums + np.array([[1], 
                           [2], 
                           [3]])

array([[ 2,  3,  4],
       [ 6,  7,  8],
       [10, 11, 12]])

In [13]:
# What is happening here?
matrix_of_nums + np.array([[1, 2], 
                           [2, 3], 
                           [3, 4]])

ValueError: operands could not be broadcast together with shapes (3,3) (3,2) 

In [15]:
# What is happening here?
matrix_of_nums + np.array([1, 2, 3])

array([[ 2,  4,  6],
       [ 5,  7,  9],
       [ 8, 10, 12]])

What happens when we try to add things of different dimensions?

In [14]:
matrix_of_nums + list_of_nums

ValueError: operands could not be broadcast together with shapes (3,3) (5,) 

We can get information about the array.

In [16]:
print(matrix_of_nums.size)
print(matrix_of_nums.ndim)
print(matrix_of_nums.shape)
print(matrix_of_nums.dtype)

9
2
(3, 3)
int64


In [17]:
# Specifying type
np.array([1, 2, 3]).dtype

dtype('int64')

In [18]:
np.array([1, 2, 3], dtype="float")

array([ 1.,  2.,  3.])

# Creating different arrays

Often, we want to create arrays of random numbers, or of a singular number, or maybe we don't even know what number, but we know the size of the array.

In [19]:
np.zeros((4, 4))

array([[ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.]])

In [20]:
np.ones((3, 2, 3))

array([[[ 1.,  1.,  1.],
        [ 1.,  1.,  1.]],

       [[ 1.,  1.,  1.],
        [ 1.,  1.,  1.]],

       [[ 1.,  1.,  1.],
        [ 1.,  1.,  1.]]])

In [21]:
np.random.random((2, 5))

array([[ 0.84775668,  0.12183767,  0.38005891,  0.98637689,  0.19269976],
       [ 0.9990294 ,  0.7170366 ,  0.85814092,  0.2235928 ,  0.93631089]])

In [22]:
np.linspace(0, 1, 100)

array([ 0.        ,  0.01010101,  0.02020202,  0.03030303,  0.04040404,
        0.05050505,  0.06060606,  0.07070707,  0.08080808,  0.09090909,
        0.1010101 ,  0.11111111,  0.12121212,  0.13131313,  0.14141414,
        0.15151515,  0.16161616,  0.17171717,  0.18181818,  0.19191919,
        0.2020202 ,  0.21212121,  0.22222222,  0.23232323,  0.24242424,
        0.25252525,  0.26262626,  0.27272727,  0.28282828,  0.29292929,
        0.3030303 ,  0.31313131,  0.32323232,  0.33333333,  0.34343434,
        0.35353535,  0.36363636,  0.37373737,  0.38383838,  0.39393939,
        0.4040404 ,  0.41414141,  0.42424242,  0.43434343,  0.44444444,
        0.45454545,  0.46464646,  0.47474747,  0.48484848,  0.49494949,
        0.50505051,  0.51515152,  0.52525253,  0.53535354,  0.54545455,
        0.55555556,  0.56565657,  0.57575758,  0.58585859,  0.5959596 ,
        0.60606061,  0.61616162,  0.62626263,  0.63636364,  0.64646465,
        0.65656566,  0.66666667,  0.67676768,  0.68686869,  0.69

In [23]:
np.arange(10, 15)

array([10, 11, 12, 13, 14])

What if I want a range, but in a different shape?

In [27]:
my_range = np.arange(20)
my_range

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [28]:
my_range = my_range.reshape((4, 5))
my_range

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

Can I rotate this?

In [29]:
my_range.swapaxes(0, 1)

array([[ 0,  5, 10, 15],
       [ 1,  6, 11, 16],
       [ 2,  7, 12, 17],
       [ 3,  8, 13, 18],
       [ 4,  9, 14, 19]])

So far, this is _awesome_.

# Calculations

Let's get a random array of integers.

In [32]:
nums = np.random.randint(1, 100, (4, 5))
# Ok, NumPy, I am now mad at you b/c your randint works differently from random.randint.
nums

array([[77, 70, 20, 22, 85],
       [29,  8, 14, 30,  9],
       [54, 83,  8, 55, 25],
       [35, 92, 45, 87, 79]])

What's the max, min, mean, and sum of these numbers?

In [33]:
print("Max:", nums.max())
print("Min:", nums.min())
print("Avg:", nums.mean())
print("Sum:", nums.sum())

Max: 92
Min: 8
Avg: 46.35
Sum: 927


That's great, but what if I want to know this for each row?

In [34]:
print("Max:", nums.max(axis=1))
print("Min:", nums.min(axis=1))
print("Avg:", nums.mean(axis=1))
print("Sum:", nums.sum(axis=1))

Max: [85 30 83 92]
Min: [20  8  8 35]
Avg: [ 54.8  18.   45.   67.6]
Sum: [274  90 225 338]


And column?

In [35]:
print("Max:", nums.max(axis=0))
print("Min:", nums.min(axis=0))
print("Avg:", nums.mean(axis=0))
print("Sum:", nums.sum(axis=0))

Max: [77 92 45 87 85]
Min: [29  8  8 22  9]
Avg: [ 48.75  63.25  21.75  48.5   49.5 ]
Sum: [195 253  87 194 198]


Note that axis 0 is what we'd think of as a column, and axis 1 is the row. This is, to me, the most confusing thing in NumPy.

I can also get the cumulative sum along an axis.

In [36]:
nums

array([[77, 70, 20, 22, 85],
       [29,  8, 14, 30,  9],
       [54, 83,  8, 55, 25],
       [35, 92, 45, 87, 79]])

In [37]:
nums.cumsum(axis=0)

array([[ 77,  70,  20,  22,  85],
       [106,  78,  34,  52,  94],
       [160, 161,  42, 107, 119],
       [195, 253,  87, 194, 198]])

In [38]:
nums.cumsum(axis=1)

array([[ 77, 147, 167, 189, 274],
       [ 29,  37,  51,  81,  90],
       [ 54, 137, 145, 200, 225],
       [ 35, 127, 172, 259, 338]])

# Indexing

You can get elements out of your NumPy arrays much like you would with a list.

In [39]:
nums

array([[77, 70, 20, 22, 85],
       [29,  8, 14, 30,  9],
       [54, 83,  8, 55, 25],
       [35, 92, 45, 87, 79]])

In [40]:
nums[0,0]

77

In [41]:
nums[2,3]

55

In [42]:
# WHOA
nums[0:2, 1:3]

array([[70, 20],
       [ 8, 14]])

Note that this is **not** a copy -- it is a view of the original array. This is very different from lists.

In [43]:
nums_slice = nums[0:2, 1:3]
print("Before change", repr(nums_slice), sep="\n")
nums_slice[0,0] = 0
print("Changed", repr(nums_slice), sep="\n")
print("Original", repr(nums), sep="\n")

Before change
array([[70, 20],
       [ 8, 14]])
Changed
array([[ 0, 20],
       [ 8, 14]])
Original
array([[77,  0, 20, 22, 85],
       [29,  8, 14, 30,  9],
       [54, 83,  8, 55, 25],
       [35, 92, 45, 87, 79]])


In [44]:
nums

array([[77,  0, 20, 22, 85],
       [29,  8, 14, 30,  9],
       [54, 83,  8, 55, 25],
       [35, 92, 45, 87, 79]])

In [45]:
nums[::-1, ::-1]

array([[79, 87, 45, 92, 35],
       [25, 55,  8, 83, 54],
       [ 9, 30, 14,  8, 29],
       [85, 22, 20,  0, 77]])

In [46]:
nums[0:4:2, 1:4]

array([[ 0, 20, 22],
       [83,  8, 55]])

In [47]:
nums

array([[77,  0, 20, 22, 85],
       [29,  8, 14, 30,  9],
       [54, 83,  8, 55, 25],
       [35, 92, 45, 87, 79]])

In [48]:
rotated = nums[::-1, ::-1]
rotated[0,0] = 99
nums

array([[77,  0, 20, 22, 85],
       [29,  8, 14, 30,  9],
       [54, 83,  8, 55, 25],
       [35, 92, 45, 87, 99]])

In [49]:
rotated = nums[::-1, ::-1].copy()
rotated[0,0] = 9999
nums

array([[77,  0, 20, 22, 85],
       [29,  8, 14, 30,  9],
       [54, 83,  8, 55, 25],
       [35, 92, 45, 87, 99]])

# Histograms

Here's a cool function for summarizing data.

In [50]:
nums

array([[77,  0, 20, 22, 85],
       [29,  8, 14, 30,  9],
       [54, 83,  8, 55, 25],
       [35, 92, 45, 87, 99]])

In [51]:
np.histogram(nums, bins=10)

(array([4, 1, 4, 2, 1, 2, 0, 1, 3, 2]),
 array([  0. ,   9.9,  19.8,  29.7,  39.6,  49.5,  59.4,  69.3,  79.2,
         89.1,  99. ]))

In [53]:
speeds = np.random.randint(0, 100, (30, 60))

In [54]:
speeds

array([[36, 53, 16, ...,  3, 77, 65],
       [57, 59, 12, ...,  5, 27, 82],
       [55, 93, 16, ..., 83,  5, 48],
       ..., 
       [31, 14,  7, ..., 26, 77, 66],
       [18, 21, 70, ..., 68, 51, 36],
       [73, 75, 28, ..., 19, 60, 61]])

In [57]:
speeds.mean(axis=0)[0]

53.733333333333334

In [58]:
speeds.mean(axis=1)[29]

46.666666666666664

## List comprehensions (and the rest of Python) & NumPy

In [61]:
[x for x in np.random.randint(0, 10, (10,))]

[0, 0, 6, 5, 6, 3, 7, 7, 2, 4]

In [62]:
[x for x in np.random.randint(0, 10, (10,2))]

[array([7, 8]),
 array([8, 1]),
 array([9, 1]),
 array([5, 7]),
 array([2, 8]),
 array([1, 5]),
 array([1, 9]),
 array([4, 0]),
 array([8, 8]),
 array([2, 9])]

In [63]:
import statistics as st

In [64]:
st.mean(np.random.randint(0, 10, (10, )))

4.0

In [65]:
sorted(np.random.randint(0, 10, (10, )))

[0, 1, 2, 2, 3, 4, 7, 8, 8, 9]

In [72]:
a = np.random.randint(0, 10, (4,5))
a

array([[7, 9, 7, 6, 6],
       [1, 2, 3, 2, 2],
       [0, 1, 7, 0, 6],
       [4, 1, 8, 7, 8]])

In [73]:
a.sort(axis=0)
a

array([[0, 1, 3, 0, 2],
       [1, 1, 7, 2, 6],
       [4, 2, 7, 6, 6],
       [7, 9, 8, 7, 8]])

# References

* [NumPy](http://www.numpy.org/)
* [Official NumPy tutorial](http://www.scipy.org/Tentative_NumPy_Tutorial)
* [NumPy Example List](http://wiki.scipy.org/Numpy_Example_List)
* [NumPy MedKit slides](http://mentat.za.net/numpy/numpy_advanced_slides/)