In [14]:
import numpy as np

NumPy is useful to us mainly as a way to efficiently do math on collections of data. The core method/class we will use is `np.array`.

In [27]:
list_of_nums = np.array([1, 2, 3, 4, 5])
matrix_of_nums = np.array([[1, 2, 3],
                           [4, 5, 6],
                           [7, 8, 9]])

In [28]:
list_of_nums

array([1, 2, 3, 4, 5])

In [29]:
matrix_of_nums

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

We can perform mathematical operations over an entire array.

In [30]:
list_of_nums + 4

array([5, 6, 7, 8, 9])

In [31]:
matrix_of_nums ** 2

array([[ 1,  4,  9],
       [16, 25, 36],
       [49, 64, 81]])

We can even perform operations with multiple arrays.

In [32]:
matrix_of_nums + matrix_of_nums

array([[ 2,  4,  6],
       [ 8, 10, 12],
       [14, 16, 18]])

In [33]:
list_of_nums + list_of_nums

array([ 2,  4,  6,  8, 10])

In [34]:
matrix_of_nums

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [35]:
# What is happening here?
matrix_of_nums + np.array([[1], 
                           [2], 
                           [3]])

array([[ 2,  3,  4],
       [ 6,  7,  8],
       [10, 11, 12]])

In [36]:
# What is happening here?
matrix_of_nums + np.array([[1, 2], 
                           [2, 3], 
                           [3, 4]])

ValueError: operands could not be broadcast together with shapes (3,3) (3,2) 

In [37]:
# What is happening here?
matrix_of_nums + np.array([1, 2, 3])

array([[ 2,  4,  6],
       [ 5,  7,  9],
       [ 8, 10, 12]])

What happens when we try to add things of different dimensions?

In [38]:
list_of_nums

array([1, 2, 3, 4, 5])

In [39]:
matrix_of_nums + list_of_nums

ValueError: operands could not be broadcast together with shapes (3,3) (5,) 

We can get information about the array.

In [40]:
print(matrix_of_nums.size)
print(matrix_of_nums.ndim)
print(matrix_of_nums.shape)
print(matrix_of_nums.dtype)

9
2
(3, 3)
int64


In [41]:
# Specifying type
np.array([1, 2, 3]).dtype

dtype('int64')

In [43]:
np.array([1.9, 2, 3], dtype="float")

array([ 1.9,  2. ,  3. ])

# Creating different arrays

Often, we want to create arrays of random numbers, or of a singular number, or maybe we don't even know what number, but we know the size of the array.

In [44]:
np.zeros((4, 4))

array([[ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.],
       [ 0.,  0.,  0.,  0.]])

In [47]:
np.ones((3, 2, 3))

array([[[ 1.,  1.,  1.],
        [ 1.,  1.,  1.]],

       [[ 1.,  1.,  1.],
        [ 1.,  1.,  1.]],

       [[ 1.,  1.,  1.],
        [ 1.,  1.,  1.]]])

In [48]:
np.random.random((2, 5))

array([[ 0.0607314 ,  0.51070803,  0.77096719,  0.61933802,  0.10720319],
       [ 0.28015117,  0.77308557,  0.05674143,  0.47395258,  0.23388844]])

In [49]:
np.linspace(0, 1, 100)

array([ 0.        ,  0.01010101,  0.02020202,  0.03030303,  0.04040404,
        0.05050505,  0.06060606,  0.07070707,  0.08080808,  0.09090909,
        0.1010101 ,  0.11111111,  0.12121212,  0.13131313,  0.14141414,
        0.15151515,  0.16161616,  0.17171717,  0.18181818,  0.19191919,
        0.2020202 ,  0.21212121,  0.22222222,  0.23232323,  0.24242424,
        0.25252525,  0.26262626,  0.27272727,  0.28282828,  0.29292929,
        0.3030303 ,  0.31313131,  0.32323232,  0.33333333,  0.34343434,
        0.35353535,  0.36363636,  0.37373737,  0.38383838,  0.39393939,
        0.4040404 ,  0.41414141,  0.42424242,  0.43434343,  0.44444444,
        0.45454545,  0.46464646,  0.47474747,  0.48484848,  0.49494949,
        0.50505051,  0.51515152,  0.52525253,  0.53535354,  0.54545455,
        0.55555556,  0.56565657,  0.57575758,  0.58585859,  0.5959596 ,
        0.60606061,  0.61616162,  0.62626263,  0.63636364,  0.64646465,
        0.65656566,  0.66666667,  0.67676768,  0.68686869,  0.69

In [59]:
np.arange(10, 20)

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

What if I want a range, but in a different shape?

In [60]:
my_range = np.arange(20)
my_range

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [61]:
my_range = my_range.reshape((4, 5))
my_range

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

Can I rotate this?

In [70]:
new_my_range = my_range.swapaxes(0, 1)
print(new_my_range)

[[ 0  5 10 15]
 [ 1  6 11 16]
 [ 2  7 12 17]
 [ 3  8 13 18]
 [ 4  9 14 19]]


So far, this is _awesome_.

# Calculations

Let's get a random array of integers.

In [103]:
nums = np.random.randint(1, 100, (4, 5))
# Ok, NumPy, I am now mad at you b/c your randint works differently from random.randint.
nums

array([[11, 90, 21, 54, 32],
       [44,  7, 57, 59, 60],
       [47, 53, 93, 66, 96],
       [40, 49, 84, 38, 12]])

What's the max, min, mean, and sum of these numbers?

In [104]:
print("Max:", nums.max())
print("Min:", nums.min())
print("Avg:", nums.mean())
print("Sum:", nums.sum())
print("Std:", nums.std())
print("Var:", nums.var())

Max: 96
Min: 7
Avg: 50.65
Sum: 1013
Std: 25.9735923584
Var: 674.6275


That's great, but what if I want to know this for each row?

In [105]:
print("Max:", nums.max(axis=1))
print("Min:", nums.min(axis=1))
print("Avg:", nums.mean(axis=1))
print("Sum:", nums.sum(axis=1))

Max: [90 60 96 84]
Min: [11  7 47 12]
Avg: [ 41.6  45.4  71.   44.6]
Sum: [208 227 355 223]


And column?

In [106]:
print("Max:", nums.max(axis=0))
print("Min:", nums.min(axis=0))
print("Avg:", nums.mean(axis=0))
print("Sum:", nums.sum(axis=0))

Max: [47 90 93 66 96]
Min: [11  7 21 38 12]
Avg: [ 35.5   49.75  63.75  54.25  50.  ]
Sum: [142 199 255 217 200]


Note that axis 0 is what we'd think of as a column, and axis 1 is the row. This is, to me, the most confusing thing in NumPy.

I can also get the cumulative sum along an axis.

In [107]:
nums

array([[11, 90, 21, 54, 32],
       [44,  7, 57, 59, 60],
       [47, 53, 93, 66, 96],
       [40, 49, 84, 38, 12]])

In [108]:
nums.cumsum(axis=0)

array([[ 11,  90,  21,  54,  32],
       [ 55,  97,  78, 113,  92],
       [102, 150, 171, 179, 188],
       [142, 199, 255, 217, 200]])

In [109]:
nums.cumsum(axis=1)

array([[ 11, 101, 122, 176, 208],
       [ 44,  51, 108, 167, 227],
       [ 47, 100, 193, 259, 355],
       [ 40,  89, 173, 211, 223]])

# Indexing

You can get elements out of your NumPy arrays much like you would with a list.

In [110]:
nums

array([[11, 90, 21, 54, 32],
       [44,  7, 57, 59, 60],
       [47, 53, 93, 66, 96],
       [40, 49, 84, 38, 12]])

In [111]:
nums[0,0]

11

In [112]:
nums[2,3]

66

In [115]:
# WHOA
subslice = nums[0:2, 1:3]

In [116]:
subslice

array([[90, 21],
       [ 7, 57]])

Note that this is **not** a copy -- it is a view of the original array. This is very different from lists.

In [117]:
nums_slice = nums[0:2, 1:3]
print("Before change", repr(nums_slice), sep="\n")
nums_slice[0,0] = 0
print("Changed", repr(nums_slice), sep="\n")
print("Original", repr(nums), sep="\n")

Before change
array([[90, 21],
       [ 7, 57]])
Changed
array([[ 0, 21],
       [ 7, 57]])
Original
array([[11,  0, 21, 54, 32],
       [44,  7, 57, 59, 60],
       [47, 53, 93, 66, 96],
       [40, 49, 84, 38, 12]])


In [127]:
nums

array([[11,  0, 21, 54, 32],
       [44,  7, 57, 59, 60],
       [47, 53, 93, 66, 96],
       [40, 49, 84, 38, 12]])

In [130]:
nums[::1, ::-1]

array([[32, 54, 21,  0, 11],
       [60, 59, 57,  7, 44],
       [96, 66, 93, 53, 47],
       [12, 38, 84, 49, 40]])

In [131]:
nums[0:4:2, 1:4]

array([[ 0, 21, 54],
       [53, 93, 66]])

In [132]:
nums

array([[11,  0, 21, 54, 32],
       [44,  7, 57, 59, 60],
       [47, 53, 93, 66, 96],
       [40, 49, 84, 38, 12]])

In [133]:
rotated = nums[::-1, ::-1]
rotated[0,0] = 99
nums

array([[11,  0, 21, 54, 32],
       [44,  7, 57, 59, 60],
       [47, 53, 93, 66, 96],
       [40, 49, 84, 38, 99]])

In [134]:
rotated = nums[::-1, ::-1].copy()
rotated[0,0] = 9999
nums

array([[11,  0, 21, 54, 32],
       [44,  7, 57, 59, 60],
       [47, 53, 93, 66, 96],
       [40, 49, 84, 38, 99]])

In [135]:
rotated

array([[9999,   38,   84,   49,   40],
       [  96,   66,   93,   53,   47],
       [  60,   59,   57,    7,   44],
       [  32,   54,   21,    0,   11]])

# Histograms

Here's a cool function for summarizing data.

In [136]:
nums

array([[11,  0, 21, 54, 32],
       [44,  7, 57, 59, 60],
       [47, 53, 93, 66, 96],
       [40, 49, 84, 38, 99]])

In [138]:
import matplotlib.pyplot as plt

In [139]:
%matplotlib inline

In [150]:
np.histogram(nums, bins=20)

(array([1, 1, 1, 0, 1, 0, 1, 1, 2, 2, 2, 2, 1, 1, 0, 0, 1, 0, 1, 2]),
 array([  0.  ,   4.95,   9.9 ,  14.85,  19.8 ,  24.75,  29.7 ,  34.65,
         39.6 ,  44.55,  49.5 ,  54.45,  59.4 ,  64.35,  69.3 ,  74.25,
         79.2 ,  84.15,  89.1 ,  94.05,  99.  ]))

In [151]:
speeds = np.random.randint(0, 100, (30, 60))

In [152]:
speeds

array([[50, 34, 92, ..., 27, 45, 55],
       [ 0, 98, 10, ..., 25, 34, 49],
       [21, 68,  0, ..., 44, 77, 18],
       ..., 
       [52, 47, 16, ..., 84,  8, 70],
       [92, 80, 83, ..., 99, 48, 10],
       [62, 27, 30, ..., 36, 22, 92]])

In [153]:
speeds.mean(axis=0)[0]

53.200000000000003

In [154]:
speeds.mean(axis=1)[29]

49.43333333333333

## List comprehensions (and the rest of Python) & NumPy

In [155]:
[x for x in np.random.randint(0, 10, (10,))]

[5, 9, 9, 6, 4, 1, 2, 1, 5, 7]

In [156]:
[x for x in np.random.randint(0, 10, (10,2))]

[array([0, 5]),
 array([4, 8]),
 array([8, 8]),
 array([7, 6]),
 array([0, 8]),
 array([0, 8]),
 array([4, 3]),
 array([6, 2]),
 array([9, 1]),
 array([9, 8])]

In [157]:
import statistics as st

In [158]:
st.mean(np.random.randint(0, 10, (10, )))

6.0

In [159]:
sorted(np.random.randint(0, 10, (10, )))

[0, 0, 0, 1, 1, 5, 7, 8, 8, 9]

In [163]:
a = np.random.randint(0, 10, (4,5))
a

array([[3, 8, 1, 9, 6],
       [8, 3, 9, 4, 7],
       [7, 6, 4, 1, 6],
       [8, 2, 0, 5, 4]])

In [165]:
a.sort(axis=0)
a

array([[0, 2, 4, 5, 7],
       [1, 3, 6, 6, 8],
       [1, 4, 6, 8, 9],
       [3, 4, 7, 8, 9]])

# References

* [NumPy](http://www.numpy.org/)
* [Official NumPy tutorial](http://www.scipy.org/Tentative_NumPy_Tutorial)
* [NumPy Example List](http://wiki.scipy.org/Numpy_Example_List)
* [NumPy MedKit slides](http://mentat.za.net/numpy/numpy_advanced_slides/)