In [18]:
import numpy as np

NumPy is useful to us mainly as a way to efficiently do math on collections of data. The core method/class we will use is np.array.

In [19]:
list_of_nums = np.array([1, 2, 3, 4, 5])
matrix_of_nums = np.array([[1, 2, 3],
                           [4, 5, 6],
                           [7, 8, 9]])

In [20]:
list_of_nums

array([1, 2, 3, 4, 5])

In [21]:
matrix_of_nums

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

We can perform mathematical operations over an entire array.

In [22]:
list_of_nums + 4

array([5, 6, 7, 8, 9])

In [23]:
matrix_of_nums ** 2

array([[ 1,  4,  9],
       [16, 25, 36],
       [49, 64, 81]])

We can even perform operations with multiple arrays.

In [24]:
matrix_of_nums + matrix_of_nums

array([[ 2,  4,  6],
       [ 8, 10, 12],
       [14, 16, 18]])

In [25]:
list_of_nums + list_of_nums

array([ 2,  4,  6,  8, 10])

In [26]:
list_of_nums + matrix_of_nums

ValueError: operands could not be broadcast together with shapes (5,) (3,3) 

In [27]:
list_of_3 = np.array([1,2,3])

In [28]:
list_of_3 + matrix_of_nums

array([[ 2,  4,  6],
       [ 5,  7,  9],
       [ 8, 10, 12]])

In [29]:
matrix_of_nums + np.array([[1],
                           [2],
                           [3]])

array([[ 2,  3,  4],
       [ 6,  7,  8],
       [10, 11, 12]])

In [30]:
matrix_of_nums + np.array([[1,2],
                           [3,4],
                           [5,6]])

ValueError: operands could not be broadcast together with shapes (3,3) (3,2) 

In [31]:
matrix_of_nums + list_of_nums

ValueError: operands could not be broadcast together with shapes (3,3) (5,) 

In [33]:
print(matrix_of_nums.size)
print(matrix_of_nums.ndim)
print(matrix_of_nums.shape)
print(matrix_of_nums.dtype)

9
2
(3, 3)
int64


In [34]:
np.array([1,2,3]).dtype

dtype('int64')

In [35]:
np.array([1,2,3], dtype="float")

array([ 1.,  2.,  3.])

#Creating different arrays

Often, we want to create arrays of random numbers, or of a singular number, or maybe we don't even know what number, but we know the size of the array.

In [43]:
np.zeros((2,3))

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [42]:
np.ones((3,2,3))

array([[[ 1.,  1.,  1.],
        [ 1.,  1.,  1.]],

       [[ 1.,  1.,  1.],
        [ 1.,  1.,  1.]],

       [[ 1.,  1.,  1.],
        [ 1.,  1.,  1.]]])

In [44]:
np.random.random((2,5))

array([[ 0.85780986,  0.79988521,  0.51090928,  0.27808698,  0.81135321],
       [ 0.45111539,  0.16670683,  0.62752539,  0.40818067,  0.37111249]])

In [46]:
np.linspace(0, 1, 20)

array([ 0.        ,  0.05263158,  0.10526316,  0.15789474,  0.21052632,
        0.26315789,  0.31578947,  0.36842105,  0.42105263,  0.47368421,
        0.52631579,  0.57894737,  0.63157895,  0.68421053,  0.73684211,
        0.78947368,  0.84210526,  0.89473684,  0.94736842,  1.        ])

In [47]:
np.arange(10, 15)

array([10, 11, 12, 13, 14])

In [48]:
my_range = np.arange(20)
my_range

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19])

In [49]:
my_range = my_range.reshape((4,5))
my_range

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [50]:
my_range.swapaxes(0, 1)

array([[ 0,  5, 10, 15],
       [ 1,  6, 11, 16],
       [ 2,  7, 12, 17],
       [ 3,  8, 13, 18],
       [ 4,  9, 14, 19]])

#Calculations
Let's get a random array of integers.

In [53]:
nums = np.random.randint(1, 100, (4, 5))

In [54]:
nums

array([[98, 46, 56, 45, 68],
       [53,  8, 19, 78, 37],
       [31, 41, 72, 66, 10],
       [53, 27, 91, 47, 83]])

In [None]:
What's the max, min, mean, and sum of these numbers?

In [55]:
print("Max:", nums.max())
print("Min:", nums.min())
print("Mean:", nums.mean())
print("Sum:", nums.sum())

Max: 98
Min: 8
Mean: 51.45
Sum: 1029


That's great, but what if I want to know this for each row?

In [56]:
print("Max:", nums.max(axis=1))
print("Min:", nums.min(axis=1))
print("Mean:", nums.mean(axis=1))
print("Sum:", nums.sum(axis=1))

Max: [98 78 72 91]
Min: [45  8 10 27]
Mean: [ 62.6  39.   44.   60.2]
Sum: [313 195 220 301]


And column?

In [57]:
print("Max:", nums.max(axis=0))
print("Min:", nums.min(axis=0))
print("Mean:", nums.mean(axis=0))
print("Sum:", nums.sum(axis=0))

Max: [98 46 91 78 83]
Min: [31  8 19 45 10]
Mean: [ 58.75  30.5   59.5   59.    49.5 ]
Sum: [235 122 238 236 198]


In [58]:
nums

array([[98, 46, 56, 45, 68],
       [53,  8, 19, 78, 37],
       [31, 41, 72, 66, 10],
       [53, 27, 91, 47, 83]])

In [61]:
nums.cumsum(axis=0)

array([[ 98,  46,  56,  45,  68],
       [151,  54,  75, 123, 105],
       [182,  95, 147, 189, 115],
       [235, 122, 238, 236, 198]])

In [62]:
nums.cumsum(axis=1)

array([[ 98, 144, 200, 245, 313],
       [ 53,  61,  80, 158, 195],
       [ 31,  72, 144, 210, 220],
       [ 53,  80, 171, 218, 301]])

#Indexing
You can get elements out of your NumPy arrays much like you would with a list.

In [63]:
nums

array([[98, 46, 56, 45, 68],
       [53,  8, 19, 78, 37],
       [31, 41, 72, 66, 10],
       [53, 27, 91, 47, 83]])

In [64]:
nums[0,0]

98

In [65]:
nums[0,1]

46

In [66]:
nums[1,0]

53

In [67]:
nums[0:2, 1:3]

array([[46, 56],
       [ 8, 19]])

Note that this is __not__ a copy -- it is a view of the original array. This is very different from lists.

In [68]:
num_slice = nums[0:2, 1:3]
num_slice

array([[46, 56],
       [ 8, 19]])

In [69]:
num_slice[0,0] = 0

In [70]:
num_slice

array([[ 0, 56],
       [ 8, 19]])

In [71]:
nums

array([[98,  0, 56, 45, 68],
       [53,  8, 19, 78, 37],
       [31, 41, 72, 66, 10],
       [53, 27, 91, 47, 83]])

In [72]:
nums[::-1, :]

array([[53, 27, 91, 47, 83],
       [31, 41, 72, 66, 10],
       [53,  8, 19, 78, 37],
       [98,  0, 56, 45, 68]])

In [73]:
nums[::-1, ::-1]

array([[83, 47, 91, 27, 53],
       [10, 66, 72, 41, 31],
       [37, 78, 19,  8, 53],
       [68, 45, 56,  0, 98]])

In [74]:
nums[::2, :]

array([[98,  0, 56, 45, 68],
       [31, 41, 72, 66, 10]])

#Histograms
Here's a cool function for summarizing data.

In [75]:
nums

array([[98,  0, 56, 45, 68],
       [53,  8, 19, 78, 37],
       [31, 41, 72, 66, 10],
       [53, 27, 91, 47, 83]])

In [76]:
np.histogram(nums, bins=10)

(array([2, 2, 1, 2, 3, 3, 2, 2, 1, 2]),
 array([  0. ,   9.8,  19.6,  29.4,  39.2,  49. ,  58.8,  68.6,  78.4,
         88.2,  98. ]))

In [77]:
speeds = np.random.randint(0, 100, (30, 60))
speeds

array([[57,  3, 43, ..., 68, 57, 28],
       [50, 11, 76, ...,  9, 59, 57],
       [39, 68, 99, ..., 63, 50, 55],
       ..., 
       [81, 95, 16, ..., 16, 36, 54],
       [24, 23, 81, ..., 81, 57,  1],
       [28, 14, 95, ..., 13, 68, 49]])

In [79]:
speeds.mean(axis=0)[0]

53.700000000000003

In [80]:
speeds.mean(axis=1)[29]

49.583333333333336

#List comprehensions

In [81]:
[x for x in np.random.randint(0, 10, (10,))]

[2, 2, 2, 8, 2, 8, 1, 5, 5, 6]

In [82]:
[x for x in np.random.randint(0, 10, (10, 2))]

[array([2, 8]),
 array([7, 4]),
 array([2, 2]),
 array([7, 7]),
 array([7, 1]),
 array([7, 3]),
 array([3, 0]),
 array([6, 7]),
 array([2, 4]),
 array([9, 4])]

In [83]:
np.random.randint(0, 10, (10, 2))

array([[2, 0],
       [1, 2],
       [1, 8],
       [2, 8],
       [6, 3],
       [0, 5],
       [8, 3],
       [3, 0],
       [8, 4],
       [8, 8]])

In [84]:
import statistics as st

In [85]:
st.mean(np.random.randint(0, 10, (10,)))

4.2999999999999998

In [86]:
sorted(np.random.randint(0, 10, (10, )))

[0, 0, 1, 2, 2, 2, 2, 3, 8, 9]

In [87]:
a = np.random.randint(0, 10, (4,5))
a

array([[9, 6, 4, 9, 3],
       [4, 8, 9, 3, 1],
       [7, 4, 8, 9, 7],
       [8, 6, 8, 0, 4]])

In [88]:
a.sort(axis=0)

In [89]:
a

array([[4, 4, 4, 0, 1],
       [7, 6, 8, 3, 3],
       [8, 6, 8, 9, 4],
       [9, 8, 9, 9, 7]])

In [90]:
a.sort(axis=1)

In [91]:
a

array([[0, 1, 4, 4, 4],
       [3, 3, 6, 7, 8],
       [4, 6, 8, 8, 9],
       [7, 8, 9, 9, 9]])

In [92]:
a = np.random.randint(0, 10, (4,5))

In [93]:
a

array([[3, 9, 9, 2, 5],
       [8, 4, 9, 6, 7],
       [4, 2, 4, 0, 6],
       [1, 8, 0, 5, 1]])

.sort() on an np.array will sort by axis=1 as a default

In [94]:
a.sort()

In [95]:
a

array([[2, 3, 5, 9, 9],
       [4, 6, 7, 8, 9],
       [0, 2, 4, 4, 6],
       [0, 1, 1, 5, 8]])

In [96]:
a = np.random.randint(0, 10, (4,5))

In [97]:
a

array([[0, 6, 2, 3, 4],
       [8, 5, 4, 9, 6],
       [6, 2, 2, 7, 6],
       [1, 7, 1, 2, 2]])

In [98]:
sorted(a)

ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()

Sorted doesn't understand how you want to sort the array of more than one dimension

#References
* [NumPy](http://www.numpy.org/)
* [Official NumPy Tutorial](http://www.scipy.org/Tentative_NumPy_Tutorial)
* [NumPy Example List](http://wiki.scipy.org/Numpy_Example_List)
* [NumPy MedKit Slides](http://mentat.za.net/numpy/numpy_advanced_slides/)