# Python Doesn't Have Good Numeric Support
* Python integers are actually an object with header and typing information
* access to Python integers requires a level of indirection
* In C, integers are directly accessible in memory without indirection
<img src="images/python-01.png" width=400 height=400>

## The Problem is Even Worse for Python Lists 
* Python lists are immensely flexible
  * no fixed size
  * OK to have heterogeneous data
* ...but as a result they are not likely to be contiguous in memory
* and even if they are, there is still a lot of indirection required
* ergo, they aren't good for fast number crunching
<img src="images/python-02.png", width=400, height=400>

## The Solution is to Use <a href="http://www.numpy.org">Numpy</a>
* written in C
* allows for vectorized operations

In [1]:
import numpy as np
np.random.seed(0)

# let's create a simple Python function which
# computes 1/x for a list of values
def compute_reciprocals(values):
    # first, create an empty numpy array
    output = np.empty(len(values))
    # now fill it...
    for i in range(len(values)):
        output[i] = 1.0 / values[i]

    return output

values = np.random.randint(1, 10, size=5)
compute_reciprocals(values)

array([0.16666667, 1.        , 0.25      , 0.25      , 0.125     ])

In [2]:
# Doing something like the above is super slow in Python
big_array = np.random.randint(1, 100, size=1_000_000)
%timeit compute_reciprocals(big_array)

1.78 s ± 34.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [None]:
import numpy as np

In [3]:
x = 2.345
x - 2.345

0.0

In [4]:
x - 2.345 == 0.0

True

## __np.trunc()__

* nearest integer __`i`__ which is closer to zero than __`x`__ is

In [5]:
np.trunc(x)

2.0

## __np.floor()__

* the largest integer __`i`__, such that __`i <= x`__

In [7]:
np.floor(x)

2.0

In [8]:
np.floor(2.01)

2.0

In [9]:
np.floor(2.00)

2.0

## __np.ceil()__

* the smallest integer __`i`__, such that __`i >= x`__

In [10]:
np.ceil(x)

3.0

In [11]:
np.ceil(2.01)

3.0

In [12]:
np.ceil(2.0)

2.0

In [13]:
np.ceil(x) - 1

2.0

In [14]:
np.ceil(2.01) - 1

2.0

## A numpy Array
* data is contiguous
<img src="images/python-03.png" width=300 height=300>

In [15]:
# numpy will intuit the data type
a = np.array([1, 4, 2, 5, 3])
a, a.dtype

(array([1, 4, 2, 5, 3]), dtype('int64'))

In [16]:
a = np.array([3.14, 4, 2, 3])
a, a.dtype

(array([3.14, 4.  , 2.  , 3.  ]), dtype('float64'))

In [17]:
# ...or you can be explicit
a = np.array([1, 2, 3, 4], dtype='float32')
a

array([1., 2., 3., 4.], dtype=float32)

In [18]:
np.array([range(i, i + 3) for i in [2, 4, 6]])

array([[2, 3, 4],
       [4, 5, 6],
       [6, 7, 8]])

In [19]:
np.zeros(10, dtype=int)

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [20]:
np.ones((3, 5), dtype=float)

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [21]:
np.full((3, 5), np.pi)

array([[3.14159265, 3.14159265, 3.14159265, 3.14159265, 3.14159265],
       [3.14159265, 3.14159265, 3.14159265, 3.14159265, 3.14159265],
       [3.14159265, 3.14159265, 3.14159265, 3.14159265, 3.14159265]])

In [22]:
np.arange(0, 20, 2)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [23]:
np.linspace(0, 1, 5)

array([0.  , 0.25, 0.5 , 0.75, 1.  ])

In [24]:
np.random.random((3, 3))

array([[0.25241011, 0.97572458, 0.1197632 ],
       [0.20856888, 0.23983126, 0.39034023],
       [0.61318864, 0.8603706 , 0.34645385]])

In [25]:
np.random.normal(0, 1, (3, 3))

array([[-0.80846432, -1.55328005,  1.09581535],
       [ 1.0034082 , -0.96307811,  0.38494418],
       [-2.10187333,  0.43009583, -0.66377698]])

In [26]:
np.random.randint(0, 10, (3, 3))

array([[1, 8, 6],
       [9, 9, 7],
       [6, 0, 9]])

In [27]:
np.eye(3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

## Converting array types

In [28]:
x = np.linspace(0, 10, 50)
x

array([ 0.        ,  0.20408163,  0.40816327,  0.6122449 ,  0.81632653,
        1.02040816,  1.2244898 ,  1.42857143,  1.63265306,  1.83673469,
        2.04081633,  2.24489796,  2.44897959,  2.65306122,  2.85714286,
        3.06122449,  3.26530612,  3.46938776,  3.67346939,  3.87755102,
        4.08163265,  4.28571429,  4.48979592,  4.69387755,  4.89795918,
        5.10204082,  5.30612245,  5.51020408,  5.71428571,  5.91836735,
        6.12244898,  6.32653061,  6.53061224,  6.73469388,  6.93877551,
        7.14285714,  7.34693878,  7.55102041,  7.75510204,  7.95918367,
        8.16326531,  8.36734694,  8.57142857,  8.7755102 ,  8.97959184,
        9.18367347,  9.3877551 ,  9.59183673,  9.79591837, 10.        ])

In [29]:
x.astype(int)

array([ 0,  0,  0,  0,  0,  1,  1,  1,  1,  1,  2,  2,  2,  2,  2,  3,  3,
        3,  3,  3,  4,  4,  4,  4,  4,  5,  5,  5,  5,  5,  6,  6,  6,  6,
        6,  7,  7,  7,  7,  7,  8,  8,  8,  8,  8,  9,  9,  9,  9, 10])

## Multi-dimensional Arrays

In [30]:
x2 = np.random.randint(10, size=(3, 4))
x2

array([[1, 1, 3, 9],
       [2, 6, 1, 1],
       [7, 8, 9, 5]])

## True "matrix-style" indexing

In [31]:
x2[0, 0]

1

In [32]:
x2[2, 0]

7

In [33]:
x2[2, -1]

5

In [34]:
x2[0, 0] = 12
x2

array([[12,  1,  3,  9],
       [ 2,  6,  1,  1],
       [ 7,  8,  9,  5]])

In [35]:
np.arange(0, 9).reshape(3, 3)

array([[0, 1, 2],
       [3, 4, 5],
       [6, 7, 8]])

## Array Slicing

In [36]:
x = np.arange(10)
x[:5]

array([0, 1, 2, 3, 4])

In [37]:
x[5:]

array([5, 6, 7, 8, 9])

In [38]:
x[4:7]

array([4, 5, 6])

In [39]:
x[::2]

array([0, 2, 4, 6, 8])

In [40]:
x[1::2]

array([1, 3, 5, 7, 9])

In [41]:
x[::-1]

array([9, 8, 7, 6, 5, 4, 3, 2, 1, 0])

In [42]:
x[5::-2]

array([5, 3, 1])

## Filtering 1-dimensional data

In [43]:
x = np.array([ 1, 0, 5, 2, 1, 0, 8, 0, 0 ])

In [44]:
np.nonzero(x)

(array([0, 2, 3, 4, 6]),)

In [45]:
x[np.nonzero(x)]

array([1, 5, 2, 1, 8])

In [46]:
x[np.nonzero(x < 3)]

array([1, 0, 2, 1, 0, 0, 0])

## Filtering 2-dimensional data

In [47]:
x = np.array([[1, 0, 0], [0, 2, 0], [1, 1, 0]])
x

array([[1, 0, 0],
       [0, 2, 0],
       [1, 1, 0]])

In [48]:
# produces two arrays, one with x coords, one with y coords
np.nonzero(x)

(array([0, 1, 2, 2]), array([0, 1, 0, 1]))

In [49]:
x[np.nonzero(x)]

array([1, 2, 1, 1])

In [50]:
np.transpose(np.nonzero(x))

array([[0, 0],
       [1, 1],
       [2, 0],
       [2, 1]])

## Multi-dimensional subarrays

In [51]:
x2

array([[12,  1,  3,  9],
       [ 2,  6,  1,  1],
       [ 7,  8,  9,  5]])

In [52]:
x2[:2, :3]

array([[12,  1,  3],
       [ 2,  6,  1]])

In [53]:
x2[:3, ::2]

array([[12,  3],
       [ 2,  1],
       [ 7,  9]])

In [54]:
x2[::-1, ::-1]

array([[ 5,  9,  8,  7],
       [ 1,  1,  6,  2],
       [ 9,  3,  1, 12]])

## Subarray Views

In [55]:
x2, id(x2)

(array([[12,  1,  3,  9],
        [ 2,  6,  1,  1],
        [ 7,  8,  9,  5]]), 4578745040)

In [56]:
x2_sub = x2[:2, :2]
x2_sub, id(x2_sub)

(array([[12,  1],
        [ 2,  6]]), 4578775728)

In [57]:
x2_sub[0, 0] = 99
x2_sub

array([[99,  1],
       [ 2,  6]])

In [58]:
x2 # changes x2 as well, since the subarray has references to the original

array([[99,  1,  3,  9],
       [ 2,  6,  1,  1],
       [ 7,  8,  9,  5]])

## Vectorized Operations

In [59]:
big_array = np.random.randint(1, 100, size=1000000)

In [60]:
%timeit 1.0 / big_array

2.37 ms ± 18 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [61]:
big_array = np.random.rand(1_000_000)
%timeit sum(big_array) # Python sum method (serial)
%timeit np.sum(big_array) # numpy sum method (vectorized)

68 ms ± 963 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)
340 µs ± 30.7 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


## Universal Funcs (ufuncs)
* operates on ndarrays in an element-by-element fashion
* _vectorized_ wrapper for a function that takes a fixed number of specific inputs and produces a fixed number of specific outputs

In [62]:
x = np.arange(9).reshape((3, 3))
2 ** x

array([[  1,   2,   4],
       [  8,  16,  32],
       [ 64, 128, 256]])

In [63]:
x = np.arange(4)
-(0.5 * x + 1) ** 2

array([-1.  , -2.25, -4.  , -6.25])

| Operator | ufunc           | Description                         |
|----------|-----------------|-------------------------------------|
|   +      | np.add          | Addition (e.g., 1 + 1 = 2)          |
|   -      | np.subtract     | Subtraction (e.g., 3 - 2 = 1)       |
|   -      | np.negative     | Unary negation (e.g., -2)           |
|   *      | np.multiply     | Multiplication (e.g., 2 * 3 = 6)    |
|   /      | np.divide       | Division (e.g., 3 / 2 = 1.5)        |
|   //     | np.floor_divide | Floor division (e.g., 3 // 2 = 1)   |
|   **     | np.power        | Exponentiation (e.g., 2 ** 3 = 8)   |
|   %      | np.mod          | Modulus/remainder (e.g., 9 % 4 = 1) |

## Trigonometric ufuncs

In [64]:
theta = np.linspace(0, np.pi, 3)
theta

array([0.        , 1.57079633, 3.14159265])

In [65]:
np.sin(theta)

array([0.0000000e+00, 1.0000000e+00, 1.2246468e-16])

In [66]:
np.cos(theta)

array([ 1.000000e+00,  6.123234e-17, -1.000000e+00])

In [67]:
np.tan(theta)

array([ 0.00000000e+00,  1.63312394e+16, -1.22464680e-16])

In [68]:
x = [-1, 0, 1]
np.arcsin(x)

array([-1.57079633,  0.        ,  1.57079633])

In [69]:
np.arccos(x)

array([3.14159265, 1.57079633, 0.        ])

In [70]:
np.arctan(x)

array([-0.78539816,  0.        ,  0.78539816])

## Exponent and Logarithm ufuncs

In [71]:
x = [1, 2, 3]
np.exp(x)

array([ 2.71828183,  7.3890561 , 20.08553692])

In [72]:
np.exp2(x)

array([2., 4., 8.])

In [73]:
np.power(3, x)

array([ 3,  9, 27])

In [74]:
np.log([1, np.e, 3])

array([0.        , 1.        , 1.09861229])

In [75]:
np.log2([1, 256, 65536])

array([ 0.,  8., 16.])

In [76]:
np.log10([1_000, 1_000_000, 10 ** 10])

array([ 3.,  6., 10.])

## Aggregate ufuncs

In [77]:
x = np.arange(1, 6)
np.add.reduce(x) # reduce to scalar via addition

15

In [78]:
np.multiply.reduce(x)

120

In [79]:
np.add.accumulate(x)

array([ 1,  3,  6, 10, 15])

In [80]:
np.multiply.accumulate(x)

array([  1,   2,   6,  24, 120])