# Numpy

In [1]:
import numpy as np

In [2]:
a = np.array([1, 2, 3])
a

array([1, 2, 3])

In [3]:
a = np.arange(5)
a

array([0, 1, 2, 3, 4])

How is a np.array better than list?
1. Space
1. Speed

Arrays have the concept of a type and this allows data to be placed closer together. We only need to use enough bits to hold the value and nothing more; no metadata necessary. Also, since arrays have this notion of a type, it numpy can use specialized low level functions for them that are much faster than Python's high level equivalents.

In [4]:
import timeit

timeit.timeit('sum(random.random() for i in range(500))', setup='import random', number=10000)

0.7198024999815971

In [5]:
timeit.timeit('np.random.rand(500).sum()', setup='import numpy as np', number=10000)

0.11746320000384003

Don't use Python loops! Use numpy functions and methods. Ufuncs (or Universal Functions)

In [6]:
# You can create arrays with different types but you loose speed in numpy functions
a = np.array([1, None, False])  
a

array([1, None, False], dtype=object)

In [7]:
a.dtype

dtype('O')

In [8]:
np.random.rand(5).dtype

dtype('float64')

In [9]:
a = np.array([1.0, 2.0, 3.0])  # What if we want to specify the type of the data?
a

array([ 1.,  2.,  3.])

In [10]:
a.dtype

dtype('float64')

In [11]:
b = np.array([1.0, 2.0, 3.0], dtype='int8')
b

array([1, 2, 3], dtype=int8)

In [12]:
a.nbytes

24

In [13]:
b.nbytes  # Look at those savings!

3

In [15]:
# What can you do with an array?
a = np.array([1, 2, 3])
b = np.array([4, 5, 6])

# Basic linear algebra!
a + b

array([5, 7, 9])

In [16]:
a - b

array([-3, -3, -3])

In [17]:
a * b

array([ 4, 10, 18])

In [18]:
a ** b

array([  1,  32, 729])

In [19]:
a[:2]  # Slicing just like Python lists

array([1, 2])

### Multidimensional

In [20]:
a = np.arange(9)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [21]:
a.reshape(3, 3)

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [22]:
a.reshape(3, 3).flatten()  # Flatten will take us back

array([0, 1, 2, 3, 4, 5, 6, 7, 8])

In [23]:
a.shape

(9,)

In [24]:
a.reshape(3, 3).shape

(3, 3)

In [25]:
np.zeros((3, 3))

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [26]:
np.ones((3, 3))

array([[ 1.,  1.,  1.],
       [ 1.,  1.,  1.],
       [ 1.,  1.,  1.]])

In [27]:
np.eye(3)  # Notice, no tuple needed

array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])

In [28]:
np.linspace(start=1, stop=10, num=77)  # Inclusive of endpoints

array([  1.        ,   1.11842105,   1.23684211,   1.35526316,
         1.47368421,   1.59210526,   1.71052632,   1.82894737,
         1.94736842,   2.06578947,   2.18421053,   2.30263158,
         2.42105263,   2.53947368,   2.65789474,   2.77631579,
         2.89473684,   3.01315789,   3.13157895,   3.25      ,
         3.36842105,   3.48684211,   3.60526316,   3.72368421,
         3.84210526,   3.96052632,   4.07894737,   4.19736842,
         4.31578947,   4.43421053,   4.55263158,   4.67105263,
         4.78947368,   4.90789474,   5.02631579,   5.14473684,
         5.26315789,   5.38157895,   5.5       ,   5.61842105,
         5.73684211,   5.85526316,   5.97368421,   6.09210526,
         6.21052632,   6.32894737,   6.44736842,   6.56578947,
         6.68421053,   6.80263158,   6.92105263,   7.03947368,
         7.15789474,   7.27631579,   7.39473684,   7.51315789,
         7.63157895,   7.75      ,   7.86842105,   7.98684211,
         8.10526316,   8.22368421,   8.34210526,   8.46

In [29]:
np.random.random((5, 5))

array([[ 0.16192397,  0.66849997,  0.34264977,  0.24999774,  0.23921711],
       [ 0.3448451 ,  0.19006817,  0.72652024,  0.6223251 ,  0.82588727],
       [ 0.86601623,  0.65941253,  0.527446  ,  0.48835736,  0.50980748],
       [ 0.84842933,  0.63303864,  0.50676617,  0.93235437,  0.32646734],
       [ 0.27871191,  0.86584952,  0.69996612,  0.98176463,  0.14818985]])

In [30]:
A = np.eye(3)
A

array([[ 1.,  0.,  0.],
       [ 0.,  1.,  0.],
       [ 0.,  0.,  1.]])

In [31]:
B = A * 5
B

array([[ 5.,  0.,  0.],
       [ 0.,  5.,  0.],
       [ 0.,  0.,  5.]])

In [32]:
A + B

array([[ 6.,  0.,  0.],
       [ 0.,  6.,  0.],
       [ 0.,  0.,  6.]])

In [33]:
# How to join two arrays
a = np.arange(5)
a

array([0, 1, 2, 3, 4])

In [34]:
b = np.arange(5, 10)
b

array([5, 6, 7, 8, 9])

In [35]:
a + b  # Does not join arrays. It adds them.

array([ 5,  7,  9, 11, 13])

In [36]:
np.concatenate((a, b))

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [37]:
A = np.zeros((3, 1))
A

array([[ 0.],
       [ 0.],
       [ 0.]])

In [38]:
B = np.ones((3, 1))
B

array([[ 1.],
       [ 1.],
       [ 1.]])

In [39]:
np.concatenate((A, B))  # joins on outermost axis, axis=0, which is equivalent to adding rows

array([[ 0.],
       [ 0.],
       [ 0.],
       [ 1.],
       [ 1.],
       [ 1.]])

In [40]:
np.concatenate((A, B), axis=1)  # Adds value to same row, adding a column

array([[ 0.,  1.],
       [ 0.,  1.],
       [ 0.,  1.]])

In [41]:
np.concatenate((A, B), axis=2)  # Out of bounds axis will throw an error

IndexError: axis 2 out of bounds [0, 2)

In [42]:
c = np.arange(5, 10)
c

array([5, 6, 7, 8, 9])

In [43]:
x, y, z = splits = np.split(c, [2, 3])  # Split before element 2 and element 3
splits

[array([5, 6]), array([7]), array([8, 9])]

In [44]:
x

array([5, 6])

In [45]:
y

array([7])

In [46]:
z

array([8, 9])

### Slicing

In [47]:
A = np.arange(9).reshape((3, 3))
A

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

In [48]:
# Let's get the second row

A[1, :]  

array([3, 4, 5])

The 1 selects the second row (zero based system in Python) and the ":" selects all columns. This collects all of the second row.

In [49]:
A[:, 2]  # Every row value from the "2" column

array([2, 5, 8])

In [50]:
# Can make n-dimensional arrays
A = np.arange(27).reshape((3, 3, 3))
A

array([[[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8]],

       [[ 9, 10, 11],
        [12, 13, 14],
        [15, 16, 17]],

       [[18, 19, 20],
        [21, 22, 23],
        [24, 25, 26]]])

In [51]:
A[:, 1, 1]  # Get me the "middle item" from each matrix. Pierce through the thrid dimension.

array([ 4, 13, 22])

In [52]:
# What if we have a 1-d array and we need it to be a column?
a = np.arange(3)
a

array([0, 1, 2])

In [53]:
a[:, np.newaxis]  # Take everything in "a" and spread it over the new axis

array([[0],
       [1],
       [2]])

Slice the array with a new axis. The new axis should be placed in the slicing operation where the new axis will appear in the new matrix. (head scratcher)

In [54]:
a[np.newaxis, :]

array([[0, 1, 2]])

In [55]:
a[:, np.newaxis, np.newaxis]

array([[[0]],

       [[1]],

       [[2]]])

### Basic Universal Functions

Back to non-looping functions ;)

In [56]:
A = np.arange(1, 10).reshape((3, 3))
A

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [57]:
np.std(A)  # Standard deviation of whole matrix

2.5819888974716112

In [58]:
np.std(A, axis=1)

array([ 0.81649658,  0.81649658,  0.81649658])

From the docs:
> Axis or axes along which the standard deviation is computed. The default is to compute the standard deviation of the flattened array.

In [59]:
a = np.random.random_sample(5)
a

array([ 0.35620773,  0.03257056,  0.6087373 ,  0.28357976,  0.61448143])

In [60]:
a.max(), a.argmax()

(0.61448142737571054, 4)

In [61]:
a.min(), a.argmin()

(0.032570562066083553, 1)

In [62]:
a.sort()  # In-place sort. Will change the original array.
a

array([ 0.03257056,  0.28357976,  0.35620773,  0.6087373 ,  0.61448143])

In [63]:
b = np.sort(a)  # Use np.sort to create a sorted copy and leave the original array untouched.
b 

array([ 0.03257056,  0.28357976,  0.35620773,  0.6087373 ,  0.61448143])

In [64]:
a

array([ 0.03257056,  0.28357976,  0.35620773,  0.6087373 ,  0.61448143])

### Bools

In [65]:
a = np.array([True, True, True])
a

array([ True,  True,  True], dtype=bool)

In [66]:
b = np.array([True, True, False])
b

array([ True,  True, False], dtype=bool)

In [67]:
np.all(a)  # All values are truthy

True

In [68]:
np.all(b)

False

In [69]:
np.any(b)  # Any value is truthy

True

In [70]:
a = np.arange(5)
a

array([0, 1, 2, 3, 4])

In [71]:
a == 2

array([False, False,  True, False, False], dtype=bool)

In [72]:
bool_array = a == 2
bool_array

array([False, False,  True, False, False], dtype=bool)

In [73]:
a[bool_array]  # New array with just entries where a == 2

array([2])

In [74]:
a[a == 2]  # More common

array([2])

In [75]:
a[(a > 0) & (a < 4)]  # Boolean "and"

array([1, 2, 3])

In [76]:
a[(a < 1) | (a > 3)]  # Boolean "or"

array([0, 4])

In [77]:
a[~(a == 2)]  # Boolean "not"

array([0, 1, 3, 4])

### Broadcasting

What happens when two Numpy arrays of different shapes interact with each other.

In [78]:
a = np.arange(3)
a

array([0, 1, 2])

In [79]:
a * 2  # Multiplication is spread out over whole array

array([0, 2, 4])

In [80]:
b = np.arange(3)
b = b[:, np.newaxis]
b

array([[0],
       [1],
       [2]])

In [81]:
a * b

array([[0, 0, 0],
       [0, 1, 2],
       [0, 2, 4]])

Both a and b are streched out to operate in the dimension of the other array. "a" is copied twice and placed into a matrix of three identical rows. "b" is copied twice and placed into a matrix of three identical columns. Then the multiplication is performed. 