# Numpy

Use numpy because:

- it's compact
- it's fast
- it matches the way we think about math

# Why not just use Python?

![Floating Point](data/img/C-Python-Float.png)

![List](data/img/Python-List-Float.png)

![List](data/img/Numpy-Array.png)

In [None]:
!pip install numpy

# "How fast is it?"

In [1]:
import numpy as np

np.random.seed(0)

def reciprocate(values):
    """Returns an array of 1.0/x for every x in values"""
    result = np.empty(len(values))
    for i, x in enumerate(values):
        result[i] = 1.0 / x
    return result

reciprocate(np.random.randint(1, 10, 10))

array([0.16666667, 1.        , 0.25      , 0.25      , 0.125     ,
       0.25      , 0.16666667, 0.33333333, 0.2       , 0.125     ])

In [2]:
xs = np.random.randint(1, 10, 1_000_000)

In [3]:
%timeit reciprocate(xs)

1.73 s ± 33.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [4]:
%timeit 1.0 / xs

1.54 ms ± 11.3 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


# How compact is it?

In [5]:
!pip install pympler

Collecting pympler
[?25l  Downloading https://files.pythonhosted.org/packages/26/75/d38ea74acc62acbd4609f3f02bb93f30342a7fb4246754f4e2becd616557/Pympler-0.8.tar.gz (175kB)
[K     |████████████████████████████████| 184kB 4.9MB/s eta 0:00:01
[?25hBuilding wheels for collected packages: pympler
  Building wheel for pympler (setup.py) ... [?25ldone
[?25h  Created wheel for pympler: filename=Pympler-0.8-cp37-none-any.whl size=164712 sha256=6e02379d9a868bf48a8ff57c6ed817ed0cc3538bd6e240484a46dbe8cfef8a73
  Stored in directory: /home/rick446/.cache/pip/wheels/6c/61/cc/4bdf1e8c8b1c04d8104322eb2508b6a21d4d3a4d1b44fdd510
Successfully built pympler
Installing collected packages: pympler
Successfully installed pympler-0.8


In [6]:
from pympler.asizeof import asizeof

In [7]:
asizeof(1)

32

In [8]:
asizeof([])

72

In [9]:
np_array_size = asizeof(xs)
np_array_size

8000096

In [12]:
import random
py_list_size = asizeof([random.randint(256000, 10000000) for i in range(1_000_000)])

In [13]:
py_list_size

40697472

In [14]:
py_list_size / np_array_size

5.0871229545245455

In [15]:
from collections import deque, namedtuple

deck = deque([])
Point = namedtuple('Point', 'x y')

In [16]:
asizeof([])

72

In [17]:
asizeof(())

56

In [18]:
asizeof((1,2))

136

In [19]:
asizeof(Point(1,2))

136

In [20]:
asizeof(deck)

640

In [21]:
asizeof({})

248

In [22]:
asizeof(set())

232

# "Rounding"

In [23]:
x = 1.234
np.trunc(x)

1.0

In [24]:
np.trunc(-x)

-1.0

In [25]:
np.floor(x)

1.0

In [26]:
np.floor(-x)

-2.0

In [27]:
np.ceil(x)

2.0

In [28]:
np.ceil(-x)

-1.0

In [29]:
np.round(x)

1.0

In [30]:
np.round(-x)

-1.0

In [31]:
np.round([1.5, 2.5])

array([2., 2.])

# Creating Numpy arrays

In [32]:
xs = np.array(range(5))
xs, xs.dtype

(array([0, 1, 2, 3, 4]), dtype('int64'))

In [33]:
xs = np.array([1,2,3,4,3.14])
xs, xs.dtype

(array([1.  , 2.  , 3.  , 4.  , 3.14]), dtype('float64'))

In [34]:
xs = np.array(range(5), dtype=float)
xs, xs.dtype

(array([0., 1., 2., 3., 4.]), dtype('float64'))

In [35]:
xs = np.array(range(5), dtype=np.int8)
xs, xs.dtype

(array([0, 1, 2, 3, 4], dtype=int8), dtype('int8'))

In [36]:
xs = np.array([0, 0, 1, 1], dtype=np.int0)
xs, xs.dtype

(array([0, 0, 1, 1]), dtype('int64'))

In [37]:
%timeit np.empty(1_000_000)

819 ns ± 26.8 ns per loop (mean ± std. dev. of 7 runs, 1000000 loops each)


In [38]:
%timeit np.zeros(1_000_000)

159 µs ± 2.33 µs per loop (mean ± std. dev. of 7 runs, 10000 loops each)


Multi-dimensional arrays

In [39]:
np.array([
    [1,2,3],
    [4,5,6]
])

array([[1, 2, 3],
       [4, 5, 6]])

Quick array creation convenience methods

In [40]:
np.zeros(20, dtype=np.int8)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
      dtype=int8)

In [41]:
np.ones((3, 4), dtype=np.float128)

array([[1., 1., 1., 1.],
       [1., 1., 1., 1.],
       [1., 1., 1., 1.]], dtype=float128)

In [42]:
np.ones((3, 4, 5), dtype=np.float128)

array([[[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]],

       [[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]],

       [[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]]], dtype=float128)

In [43]:
xs = np.full((3,4), np.pi)
xs

array([[3.14159265, 3.14159265, 3.14159265, 3.14159265],
       [3.14159265, 3.14159265, 3.14159265, 3.14159265],
       [3.14159265, 3.14159265, 3.14159265, 3.14159265]])

In [44]:
xs.shape

(3, 4)

# Creating ranges of array values

In [45]:
np.arange(10)

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [46]:
np.arange(10.0)

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [47]:
np.arange(10, 20)

array([10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [48]:
np.arange(10, 50, 5)

array([10, 15, 20, 25, 30, 35, 40, 45])

In [49]:
np.arange(10, 50, 5.5)

array([10. , 15.5, 21. , 26.5, 32. , 37.5, 43. , 48.5])

In [50]:
np.linspace(0, 10, 3)

array([ 0.,  5., 10.])

In [51]:
np.linspace(0, 10, 20)

array([ 0.        ,  0.52631579,  1.05263158,  1.57894737,  2.10526316,
        2.63157895,  3.15789474,  3.68421053,  4.21052632,  4.73684211,
        5.26315789,  5.78947368,  6.31578947,  6.84210526,  7.36842105,
        7.89473684,  8.42105263,  8.94736842,  9.47368421, 10.        ])

# The `r_` and `c_` helpers

In [52]:
np.r_[:5]

array([0, 1, 2, 3, 4])

In [53]:
np.r_[:5, 200, :5]

array([  0,   1,   2,   3,   4, 200,   0,   1,   2,   3,   4])

In [54]:
np.r_[[1,2,3], [10, 11], 5:1:-0.3]

array([ 1. ,  2. ,  3. , 10. , 11. ,  5. ,  4.7,  4.4,  4.1,  3.8,  3.5,
        3.2,  2.9,  2.6,  2.3,  2. ,  1.7,  1.4,  1.1])

In [55]:
np.r_[0:10:3]

array([0, 3, 6, 9])

In [56]:
np.r_[0:10:3j]

array([ 0.,  5., 10.])

In [57]:
(np.r_[0:10:20j] == np.linspace(0, 10, 20)).all()

True

In [61]:
np.r_[0:10:20]

array([0])

In [62]:
np.c_[:10, :100:10j]

array([[  0.        ,   0.        ],
       [  1.        ,  11.11111111],
       [  2.        ,  22.22222222],
       [  3.        ,  33.33333333],
       [  4.        ,  44.44444444],
       [  5.        ,  55.55555556],
       [  6.        ,  66.66666667],
       [  7.        ,  77.77777778],
       [  8.        ,  88.88888889],
       [  9.        , 100.        ]])

Fast generation of random values

In [63]:
np.random.random((4,4))

array([[0.80351516, 0.44480604, 0.66925971, 0.63053011],
       [0.65843811, 0.24503895, 0.27820458, 0.44202878],
       [0.579177  , 0.29078652, 0.3035797 , 0.13168765],
       [0.46878437, 0.85192625, 0.79180773, 0.75873527]])

In [64]:
np.random.normal(10, 2, (3,3))

array([[11.22273148, 10.2920636 ,  9.79183757],
       [10.70956785, 12.26631001, 11.04919632],
       [ 8.59264221,  7.60657508,  9.39315149]])

In [67]:
np.random.randn(10)

array([ 0.33602806, -0.42547855, -1.52691768, -0.32530832, -2.57883076,
       -0.21233248, -0.73379334, -1.09523666, -0.27533037, -1.38402864])

In [68]:
np.eye(5)

array([[1., 0., 0., 0., 0.],
       [0., 1., 0., 0., 0.],
       [0., 0., 1., 0., 0.],
       [0., 0., 0., 1., 0.],
       [0., 0., 0., 0., 1.]])

In [69]:
x = np.empty((2,4))
x

array([[10. , 15.5, 21. , 26.5],
       [32. , 37.5, 43. , 48.5]])

In [70]:
ys = [np.empty((2,4)) for i in range(4)]
for y in ys:
    print(y)

[[10.  15.5 21.  26.5]
 [32.  37.5 43.  48.5]]
[[10.  15.5 21.  26.5]
 [32.  37.5 43.  48.5]]
[[10.  15.5 21.  26.5]
 [32.  37.5 43.  48.5]]
[[0.00000000e+000 2.41907520e-312 1.93101617e-312 1.23075756e-312]
 [1.01855798e-312 1.08221785e-312 1.10618131e-046 2.31963749e-056]]


# Converting array types

In [71]:
xs = np.r_[:10:20j]
xs

array([ 0.        ,  0.52631579,  1.05263158,  1.57894737,  2.10526316,
        2.63157895,  3.15789474,  3.68421053,  4.21052632,  4.73684211,
        5.26315789,  5.78947368,  6.31578947,  6.84210526,  7.36842105,
        7.89473684,  8.42105263,  8.94736842,  9.47368421, 10.        ])

In [72]:
xs.astype(int)

array([ 0,  0,  1,  1,  2,  2,  3,  3,  4,  4,  5,  5,  6,  6,  7,  7,  8,
        8,  9, 10])

# Numpy indexing and slicing

In [73]:
xs = np.random.randint(1, 10, (4, 4))
xs

array([[6, 9, 2, 8],
       [1, 5, 2, 5],
       [2, 3, 1, 4],
       [8, 6, 3, 7]])

In [74]:
xs[1, 1]

5

In [75]:
xs[1]

array([1, 5, 2, 5])

In [76]:
xs[:,1]

array([9, 5, 3, 6])

In [77]:
xs[1:3,1:3]

array([[5, 2],
       [3, 1]])

In [78]:
xs[[1,2], 2]

array([2, 1])

Lists for all indexes work a little different

In [79]:
xs[[1,2], [3,2]]

array([5, 1])

In [80]:
xs

array([[6, 9, 2, 8],
       [1, 5, 2, 5],
       [2, 3, 1, 4],
       [8, 6, 3, 7]])

In [81]:
xs[1, 3], xs[2, 2]

(5, 1)

In [82]:
xs.reshape(16)

array([6, 9, 2, 8, 1, 5, 2, 5, 2, 3, 1, 4, 8, 6, 3, 7])

In [83]:
xs.ravel()

array([6, 9, 2, 8, 1, 5, 2, 5, 2, 3, 1, 4, 8, 6, 3, 7])

In [84]:
np.r_[:9].reshape((3,3))

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

Sometimes you need to take a 1-D array and make a column vector out of it:

In [85]:
xs = np.r_[:10]
xs

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [86]:
xs[:, None]

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])

In [87]:
xs[:, np.newaxis]

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])

In [88]:
print(np.newaxis)

None


In [90]:
np.c_[xs]

array([[0],
       [1],
       [2],
       [3],
       [4],
       [5],
       [6],
       [7],
       [8],
       [9]])

# Filtering

In [91]:
xs = np.random.randint(0, 5, 10)

In [92]:
xs

array([1, 0, 3, 1, 1, 0, 3, 2, 3, 3])

In [93]:
xs.nonzero()

(array([0, 2, 3, 4, 6, 7, 8, 9]),)

In [94]:
xs[xs.nonzero()]

array([1, 3, 1, 1, 3, 2, 3, 3])

In [95]:
xs < 3

array([ True,  True, False,  True,  True,  True, False,  True, False,
       False])

In [96]:
xs[xs < 3]

array([1, 0, 1, 1, 0, 2])

In [100]:
xs = np.random.randint(0, 5, (3, 3))
xs

array([[4, 4, 0],
       [0, 2, 4],
       [1, 2, 0]])

In [101]:
xs.nonzero()

(array([0, 0, 1, 1, 2, 2]), array([0, 1, 1, 2, 0, 1]))

In [102]:
np.array(xs.nonzero())

array([[0, 0, 1, 1, 2, 2],
       [0, 1, 1, 2, 0, 1]])

In [103]:
xs[xs.nonzero()]

array([4, 4, 2, 4, 1, 2])

Maybe this looks a little cleaner?

In [104]:
np.transpose(xs.nonzero())

array([[0, 0],
       [0, 1],
       [1, 1],
       [1, 2],
       [2, 0],
       [2, 1]])

In [105]:
xs[xs.nonzero()] = 100
xs

array([[100, 100,   0],
       [  0, 100, 100],
       [100, 100,   0]])

In [106]:
xs = np.random.random((4,4))
xs

array([[0.48094146, 0.00790153, 0.3666162 , 0.71199577],
       [0.73064037, 0.36384548, 0.93603056, 0.5901293 ],
       [0.21012187, 0.68841398, 0.39712738, 0.74640262],
       [0.18964181, 0.40318946, 0.97770189, 0.09312152]])

In [108]:
xs[xs > 0.7] = 0.7
xs

array([[0.48094146, 0.00790153, 0.3666162 , 0.7       ],
       [0.7       , 0.36384548, 0.7       , 0.5901293 ],
       [0.21012187, 0.68841398, 0.39712738, 0.7       ],
       [0.18964181, 0.40318946, 0.7       , 0.09312152]])

# Array views

In [109]:
xs = np.r_[:16].reshape((4,4))
xs

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [110]:
xs_view = xs[1:3,1:3]
xs_view

array([[ 5,  6],
       [ 9, 10]])

In [111]:
xs_view[0,0] = 100
xs_view

array([[100,   6],
       [  9,  10]])

In [112]:
xs

array([[  0,   1,   2,   3],
       [  4, 100,   6,   7],
       [  8,   9,  10,  11],
       [ 12,  13,  14,  15]])

In [113]:
xs_view_copy = xs_view.copy()

In [114]:
xs_view_copy[0, 0] = 200
xs_view_copy

array([[200,   6],
       [  9,  10]])

In [115]:
xs

array([[  0,   1,   2,   3],
       [  4, 100,   6,   7],
       [  8,   9,  10,  11],
       [ 12,  13,  14,  15]])

In [116]:
xs_view

array([[100,   6],
       [  9,  10]])

# Numpy universal functions (ufuncs)

Most `np.array` methods also exist as functions in the numpy namespace.

They typically operate well over scalars, numpy arrays, and Python sequences

In [117]:
np.multiply(2, 3)  # scalar / scalar

6

In [118]:
np.multiply(np.r_[:10], 20) # array / scalar

array([  0,  20,  40,  60,  80, 100, 120, 140, 160, 180])

In [119]:
np.multiply(np.r_[:10], [4] * 10)  # array / list

array([ 0,  4,  8, 12, 16, 20, 24, 28, 32, 36])

In [120]:
np.divide(1, 0)

  """Entry point for launching an IPython kernel.


inf

In [121]:
np.divide([1], [0])

  """Entry point for launching an IPython kernel.


array([inf])

In [122]:
np.divide(1, 0) - np.divide(1, 0)

  """Entry point for launching an IPython kernel.
  """Entry point for launching an IPython kernel.


nan

In [123]:
1 / np.zeros(10)

  """Entry point for launching an IPython kernel.


array([inf, inf, inf, inf, inf, inf, inf, inf, inf, inf])

In [124]:
1 // np.r_[0]

  """Entry point for launching an IPython kernel.


array([0])

In [125]:
2 ** np.r_[:9].reshape((3,3))

array([[  1,   2,   4],
       [  8,  16,  32],
       [ 64, 128, 256]])

https://docs.scipy.org/doc/numpy/reference/ufuncs.html for the full list of ufuncs

Generally, operators delegate to ufuncs if at least one side of the operation is a numpy type.

# Aggregation

We can apply ufuncs as reduction operators:

In [126]:
xs

array([[  0,   1,   2,   3],
       [  4, 100,   6,   7],
       [  8,   9,  10,  11],
       [ 12,  13,  14,  15]])

In [127]:
np.add.reduce(xs)

array([ 24, 123,  32,  36])

In [128]:
np.add.reduce(xs, axis=1)

array([  6, 117,  38,  54])

In [129]:
np.multiply.reduce(xs)

array([    0, 11700,  1680,  3465])

In [130]:
np.multiply.reduce(xs, axis=1)

array([    0, 16800,  7920, 32760])

In [131]:
np.mean(xs)

13.4375

In [132]:
np.mean(xs, axis=0)

array([ 6.  , 30.75,  8.  ,  9.  ])

In [133]:
np.mean(xs, axis=1)

array([ 1.5 , 29.25,  9.5 , 13.5 ])

# Build your own ufunc

Although this does _not_ give you "compiled C" performance, you can get the casting rules for your own functions using `np.vectorize`

In [134]:
def saturating_adder(maxval):
    def add(x, y):
        return min([x+y, maxval])
    return np.vectorize(add)
    

In [135]:
myadd = saturating_adder(10)

In [136]:
myadd(2, 9)

array(10)

In [137]:
myadd([2,3,4,5], 6)

array([ 8,  9, 10, 10])

In [138]:
myadd([2,3,4,5], np.r_[5:9])

array([ 7,  9, 10, 10])

# Lab

Open the [Numpy Lab][numpy-lab]

[numpy-lab]: ./numpy-lab.ipynb