A.1 ndarray Object Internals

In [1]:
import numpy as np

np.ones((10, 5)).shape

(10, 5)

In [2]:
np.ones((3, 4, 5), dtype=np.float64).strides

(160, 40, 8)

In [4]:
# NumPy dtype Hierarchy

ints = np.ones(10, dtype=np.uint16)
floats = np.ones(10, dtype=np.float32)
np.issubdtype(ints.dtype, np.integer)



True

In [5]:
np.issubdtype(floats.dtype, np.floating)

True

In [6]:
np.float64.mro()

[numpy.float64,
 numpy.floating,
 numpy.inexact,
 numpy.number,
 numpy.generic,
 float,
 object]

In [7]:
np.issubdtype(ints.dtype, np.number)

True

A.2 Advanced Array Manipulation

In [None]:
# Reshaping Arrays

arr = np.arange(8)
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [9]:
arr.reshape((4, 2))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [10]:
arr.reshape((4, 2)).reshape((2, 4))

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [11]:
arr = np.arange(15)
arr.reshape((5, -1))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [13]:
other_arr = np.ones((3, 5))
other_arr.shape

(3, 5)

In [14]:
arr.reshape(other_arr.shape)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [15]:
arr = np.arange(15).reshape((5, 3))
arr

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [16]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [17]:
arr.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [18]:
# C Versus Fortan Order

arr = np.arange(12).reshape((3, 4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [19]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [20]:
arr.ravel('F')

array([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])

In [22]:
# Concatenating and Splitting Arrays

arr1 = np.array([[1, 2, 3], [4, 5, 6]])
arr2 = np.array([[7, 8, 9], [10, 11, 12]])
np.concatenate([arr1, arr2], axis=0)

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [23]:
np.concatenate([arr1, arr2], axis=1)

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [24]:
np.vstack((arr1, arr2))

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [25]:
np.hstack((arr1, arr2))

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [26]:
arr = np.random.randn(5, 2)
arr

array([[-0.17436422,  0.38463187],
       [ 0.77542162, -0.79090572],
       [ 0.40936218, -0.47497652],
       [-1.72991043,  0.0289211 ],
       [ 0.25482025,  1.05906761]])

In [27]:
first, second, third = np.split(arr, [1, 3])
first

array([[-0.17436422,  0.38463187]])

In [28]:
second

array([[ 0.77542162, -0.79090572],
       [ 0.40936218, -0.47497652]])

In [29]:
third

array([[-1.72991043,  0.0289211 ],
       [ 0.25482025,  1.05906761]])

In [30]:
arr = np.arange(6)
arr1 = arr.reshape((3, 2))
arr2 = np.random.randn(3, 2)
np.r_[arr1, arr2]

array([[ 0.        ,  1.        ],
       [ 2.        ,  3.        ],
       [ 4.        ,  5.        ],
       [-1.5938487 , -0.27839344],
       [ 0.23748079, -0.26041985],
       [-0.88342441, -1.65434689]])

In [31]:
np.c_[np.r_[arr1, arr2], arr]

array([[ 0.        ,  1.        ,  0.        ],
       [ 2.        ,  3.        ,  1.        ],
       [ 4.        ,  5.        ,  2.        ],
       [-1.5938487 , -0.27839344,  3.        ],
       [ 0.23748079, -0.26041985,  4.        ],
       [-0.88342441, -1.65434689,  5.        ]])

In [32]:
np.c_[1:6, -10:-5]

array([[  1, -10],
       [  2,  -9],
       [  3,  -8],
       [  4,  -7],
       [  5,  -6]])

In [33]:
# Repeating Elements: tile and repeat

arr = np.arange(3)
arr

array([0, 1, 2])

In [34]:
arr.repeat(3)

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [35]:
arr.repeat([2, 3, 4])


array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [37]:
arr = np.random.randn(2, 2)
arr

array([[ 0.81930283, -1.50233923],
       [ 0.13100958,  1.38094736]])

In [38]:
arr.repeat(2, axis=0)

array([[ 0.81930283, -1.50233923],
       [ 0.81930283, -1.50233923],
       [ 0.13100958,  1.38094736],
       [ 0.13100958,  1.38094736]])

In [39]:
arr.repeat([2, 3], axis=0)

array([[ 0.81930283, -1.50233923],
       [ 0.81930283, -1.50233923],
       [ 0.13100958,  1.38094736],
       [ 0.13100958,  1.38094736],
       [ 0.13100958,  1.38094736]])

In [40]:
arr.repeat([2, 3], axis=1)

array([[ 0.81930283,  0.81930283, -1.50233923, -1.50233923, -1.50233923],
       [ 0.13100958,  0.13100958,  1.38094736,  1.38094736,  1.38094736]])

In [41]:
arr

array([[ 0.81930283, -1.50233923],
       [ 0.13100958,  1.38094736]])

In [42]:
np.tile(arr, (2, 1))

array([[ 0.81930283, -1.50233923],
       [ 0.13100958,  1.38094736],
       [ 0.81930283, -1.50233923],
       [ 0.13100958,  1.38094736]])

In [43]:
np.tile(arr, (3, 2))

array([[ 0.81930283, -1.50233923,  0.81930283, -1.50233923],
       [ 0.13100958,  1.38094736,  0.13100958,  1.38094736],
       [ 0.81930283, -1.50233923,  0.81930283, -1.50233923],
       [ 0.13100958,  1.38094736,  0.13100958,  1.38094736],
       [ 0.81930283, -1.50233923,  0.81930283, -1.50233923],
       [ 0.13100958,  1.38094736,  0.13100958,  1.38094736]])

In [44]:
# Facy Indexing Equivalents: take and put

arr = np.arange(10) * 100
inds = [7, 1, 2, 6]
arr[inds]

array([700, 100, 200, 600])

In [45]:
arr.take(inds)

array([700, 100, 200, 600])

In [47]:
arr.put(inds, 42)
arr

array([  0,  42,  42, 300, 400, 500,  42,  42, 800, 900])

In [48]:
inds = [2, 0, 2, 1]
arr = np.random.randn(2, 4)
arr

array([[-0.6050039 , -0.63999527, -0.25797005, -0.98507493],
       [ 0.62629298,  0.39179615, -1.42568854,  0.07293799]])

In [49]:
arr.take(inds, axis=1)

array([[-0.25797005, -0.6050039 , -0.25797005, -0.63999527],
       [-1.42568854,  0.62629298, -1.42568854,  0.39179615]])

A.3 Broadcasting

In [2]:
import numpy as np


arr = np.arange(5)
arr

array([0, 1, 2, 3, 4])

In [3]:
arr * 4

array([ 0,  4,  8, 12, 16])

In [4]:
arr = np.random.randn(4, 3)
arr.mean(0)

array([-0.05727587, -0.17194382, -0.00195156])

In [6]:
demeaned = arr - arr.mean(0)
demeaned

array([[-1.19041479,  2.05989947,  0.20173754],
       [ 0.07391815, -0.83229166, -0.6603563 ],
       [ 0.4827489 , -0.43622634,  1.64967486],
       [ 0.63374775, -0.79138146, -1.1910561 ]])

In [7]:
demeaned.mean(0)

array([ 0.00000000e+00, -2.77555756e-17,  0.00000000e+00])

In [8]:
arr

array([[-1.24769067,  1.88795565,  0.19978598],
       [ 0.01664227, -1.00423549, -0.66230787],
       [ 0.42547303, -0.60817017,  1.6477233 ],
       [ 0.57647187, -0.96332529, -1.19300767]])

In [9]:
row_means = arr.mean(1)
row_means.shape

(4,)

In [10]:
row_means.reshape(4, 1)

array([[ 0.28001699],
       [-0.54996703],
       [ 0.48834205],
       [-0.52662036]])

In [11]:
demeand = arr - row_means.reshape((4, 1))
demeand.mean(1)

array([-1.85037171e-17, -3.70074342e-17,  7.40148683e-17,  1.11022302e-16])

In [12]:
# Broadcasting Over Other Axes

arr - arr.mean(1)

ValueError: operands could not be broadcast together with shapes (4,3) (4,) 

In [13]:
arr - arr.mean(1).reshape((4,1))

array([[-1.52770765,  1.60793866, -0.08023101],
       [ 0.5666093 , -0.45426846, -0.11234084],
       [-0.06286903, -1.09651222,  1.15938125],
       [ 1.10309223, -0.43670493, -0.66638731]])

In [14]:
arr =np.zeros((4,4))
arr_3d = arr[:, np.newaxis, :]
arr_3d.shape

(4, 1, 4)

In [15]:
arr_1d = np.random.normal(size=3)
arr_1d[:, np.newaxis]

array([[1.00292464],
       [0.09190387],
       [0.66471496]])

In [16]:
arr_1d[np.newaxis, :]

array([[1.00292464, 0.09190387, 0.66471496]])

In [17]:
arr = np.random.randn(3, 4, 5)
depth_means = arr.mean(2)
depth_means

array([[-0.01759972,  0.24255211,  0.14551152,  0.69861989],
       [ 0.00762028,  0.3882635 , -0.5668809 , -1.00681192],
       [ 0.27071336,  0.91649147, -0.02275329,  0.65269149]])

In [18]:
depth_means.shape

(3, 4)

In [19]:
demeand = arr - depth_means[:, :, np.newaxis]
demeand.mean(2)

array([[-8.32667268e-18,  0.00000000e+00,  0.00000000e+00,
         8.88178420e-17],
       [ 2.22044605e-17,  3.88578059e-17,  6.66133815e-17,
         0.00000000e+00],
       [ 8.88178420e-17, -2.22044605e-17,  4.44089210e-17,
        -2.22044605e-17]])

In [None]:
def demean_axis(arr, axis=0):
    means = arr.mean(axis)
    indexer = [slice(None)] * arr.ndim
    indexer[axis] = np.newaxis
    return arr - means[indexer]


In [20]:
# Setting Array Values by Broadcasting


arr = np.zeros((4, 3))
arr[:] = 5
arr

array([[5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.]])

In [21]:
col = np.array([1.28, -0.42, 0.44, 1.6])
arr[:] = col[:, np.newaxis]
arr

array([[ 1.28,  1.28,  1.28],
       [-0.42, -0.42, -0.42],
       [ 0.44,  0.44,  0.44],
       [ 1.6 ,  1.6 ,  1.6 ]])

In [22]:
arr[:2] = [[-1.37], [0.509]]
arr

array([[-1.37 , -1.37 , -1.37 ],
       [ 0.509,  0.509,  0.509],
       [ 0.44 ,  0.44 ,  0.44 ],
       [ 1.6  ,  1.6  ,  1.6  ]])

A.4 Advanced ufunc Usage

In [23]:
# ufunc Instance Methods

arr = np.arange(10)
np.add.reduce(arr)

np.int64(45)

In [24]:
arr.sum()

np.int64(45)

In [25]:
np.random.seed(123456)
arr = np.random.randn(5, 5)
arr[::2].sort(1)
arr[:, :-1] < arr[:, 1:]

array([[ True,  True,  True,  True],
       [ True, False,  True, False],
       [ True,  True,  True,  True],
       [False,  True, False, False],
       [ True,  True,  True,  True]])

In [26]:
np.logical_and.reduce(arr[:, :-1] < arr[:, 1:], axis=1)

array([ True, False,  True, False,  True])

In [27]:
arr = np.arange(15).reshape((3, 5))
np.add.accumulate(arr, axis=1)

array([[ 0,  1,  3,  6, 10],
       [ 5, 11, 18, 26, 35],
       [10, 21, 33, 46, 60]])

In [28]:
arr = np.arange(3).repeat([1, 2, 2])
arr

array([0, 1, 1, 2, 2])

In [29]:
np.multiply.outer(arr, np.arange(5))

array([[0, 0, 0, 0, 0],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 2, 4, 6, 8],
       [0, 2, 4, 6, 8]])

In [30]:
x, y = np.random.randn(3, 4), np.random.randn(5)
result = np.subtract.outer(x, y)
result.shape

(3, 4, 5)

In [31]:
arr = np.arange(10)
np.add.reduceat(arr, [0, 5, 8])

array([10, 18, 17])

In [32]:
arr = np.multiply.outer(np.arange(4), np.arange(5))
arr

array([[ 0,  0,  0,  0,  0],
       [ 0,  1,  2,  3,  4],
       [ 0,  2,  4,  6,  8],
       [ 0,  3,  6,  9, 12]])

In [33]:
np.add.reduceat(arr, [0, 2, 4], axis=1)

array([[ 0,  0,  0],
       [ 1,  5,  4],
       [ 2, 10,  8],
       [ 3, 15, 12]])

In [34]:
# Writing New ufuncs in Python

def add_elements(x, y):
    return x + y

add_them = np.frompyfunc(add_elements, 2, 1)
add_them(np.arange(8), np.arange(8))

array([0, 2, 4, 6, 8, 10, 12, 14], dtype=object)

In [36]:
add_them = np.vectorize(add_elements, otypes=[np.float64])
add_them(np.arange(8), np.arange(8))

array([ 0.,  2.,  4.,  6.,  8., 10., 12., 14.])

In [37]:
arr = np.random.randn(10000)
%timeit add_them(arr, arr)

564 μs ± 6.27 μs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [39]:
%timeit np.add(arr, arr)

1.2 μs ± 6.8 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


A.5 Structured and Record Arrays

In [2]:
import numpy as np


dtype = [('x', np.float64), ('y', np.int32)]
sarr = np.array([(1.5, 6), (np.pi, -2)], dtype=dtype)

sarr

array([(1.5       ,  6), (3.14159265, -2)],
      dtype=[('x', '<f8'), ('y', '<i4')])

In [3]:
sarr[0]

np.void((1.5, 6), dtype=[('x', '<f8'), ('y', '<i4')])

In [4]:
sarr[0]['y']

np.int32(6)

In [5]:
sarr['x']

array([1.5       , 3.14159265])

In [6]:
# Nasted dtypes and Multidimensional Fields

dtype = [('x', np.float64, 3), ('y', np.int32)]
arr = np.zeros(4, dtype=dtype)
arr


array([([0., 0., 0.], 0), ([0., 0., 0.], 0), ([0., 0., 0.], 0),
       ([0., 0., 0.], 0)], dtype=[('x', '<f8', (3,)), ('y', '<i4')])

In [7]:
arr[0]['x']

array([0., 0., 0.])

In [8]:
arr['x']

array([[0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.],
       [0., 0., 0.]])

In [9]:
dtype = [('x', [('a', 'f8'), ('b', 'f4')]), ('y', np.int32)]
data = np.array([((1,2), 5), ((3, 4), 6)], dtype=dtype)
data['x']

array([(1., 2.), (3., 4.)], dtype=[('a', '<f8'), ('b', '<f4')])

In [10]:
data['y']

array([5, 6], dtype=int32)

In [11]:
data['x']['a']

array([1., 3.])

A.6 More About Sorting

In [12]:
arr = np.random.randn(6)
arr.sort()
arr

array([-1.99968033, -1.93193439, -0.52483877,  0.17967257,  1.029372  ,
        2.02861768])

In [17]:
arr = np.random.randn(3, 5)
arr

array([[ 0.58855389,  1.31016228,  0.22678905,  1.56731075,  0.34869487],
       [-0.66488114, -0.60730657,  0.18307019, -2.24410054, -0.11922446],
       [ 0.22964507, -0.9020521 , -0.06278652,  1.73692143, -0.48940769]])

In [18]:
arr[:, 0].sort()
arr

array([[-0.66488114,  1.31016228,  0.22678905,  1.56731075,  0.34869487],
       [ 0.22964507, -0.60730657,  0.18307019, -2.24410054, -0.11922446],
       [ 0.58855389, -0.9020521 , -0.06278652,  1.73692143, -0.48940769]])

In [19]:
arr = np.random.randn(5)
arr

array([-1.12242105, -1.03284289,  0.33923036,  0.04070349,  0.96752961])

In [20]:
np.sort(arr)

array([-1.12242105, -1.03284289,  0.04070349,  0.33923036,  0.96752961])

In [21]:
arr

array([-1.12242105, -1.03284289,  0.33923036,  0.04070349,  0.96752961])

In [22]:
arr = np.random.randn(3, 5)
arr

array([[ 0.32599678,  1.02757344, -1.69950147, -0.81565548, -0.4555942 ],
       [ 0.79365116,  0.63272975,  2.54486569,  0.78163406,  0.03849848],
       [-0.18210666,  0.83522375, -0.15948588,  0.04807196, -0.25951563]])

In [23]:
arr.sort(axis=1)
arr

array([[-1.69950147, -0.81565548, -0.4555942 ,  0.32599678,  1.02757344],
       [ 0.03849848,  0.63272975,  0.78163406,  0.79365116,  2.54486569],
       [-0.25951563, -0.18210666, -0.15948588,  0.04807196,  0.83522375]])

In [24]:
arr[:, ::-1]

array([[ 1.02757344,  0.32599678, -0.4555942 , -0.81565548, -1.69950147],
       [ 2.54486569,  0.79365116,  0.78163406,  0.63272975,  0.03849848],
       [ 0.83522375,  0.04807196, -0.15948588, -0.18210666, -0.25951563]])

In [25]:
# Indirect Sorts: argsort and lexsort

values = np.array([5, 0, 1, 3, 2])
indexer = values.argsort()
indexer

array([1, 2, 4, 3, 0])

In [26]:
values[indexer]

array([0, 1, 2, 3, 5])

In [27]:
arr = np.random.randn(3, 5)
arr[0] = values
arr

array([[ 5.        ,  0.        ,  1.        ,  3.        ,  2.        ],
       [ 0.04447453,  0.73160455,  0.07287318, -0.66128578, -1.59223783],
       [-0.00581481,  0.73834596,  1.44703046, -0.02394432, -0.61888691]])

In [28]:
arr[:, arr[0].argsort()]

array([[ 0.        ,  1.        ,  2.        ,  3.        ,  5.        ],
       [ 0.73160455,  0.07287318, -1.59223783, -0.66128578,  0.04447453],
       [ 0.73834596,  1.44703046, -0.61888691, -0.02394432, -0.00581481]])

In [30]:
first_name = np.array(['Bob', 'Jane', 'Steve', 'Bill', 'Barbara'])
last_name = np.array(['Jones', 'Arnold', 'Arnold', 'Jonses', 'Walters'])
sorter = np.lexsort((first_name, last_name))
sorter
zip(last_name[sorter], first_name[sorter])

<zip at 0x10d080100>

In [31]:
# Alternative Sort Algorithms

values = np.array(['2:first', '2:second', '1:first', '1:second', '1:third'])
key = np.array([2, 2, 1, 1, 1])
indexer = key.argsort(kind='mergesort')
indexer

array([2, 3, 4, 0, 1])

In [32]:
values.take(indexer)

array(['1:first', '1:second', '1:third', '2:first', '2:second'],
      dtype='<U8')

In [33]:
# Partially Sorting Arrays

np.random.seed(12345)
arr = np.random.randn(20)
arr

array([-0.20470766,  0.47894334, -0.51943872, -0.5557303 ,  1.96578057,
        1.39340583,  0.09290788,  0.28174615,  0.76902257,  1.24643474,
        1.00718936, -1.29622111,  0.27499163,  0.22891288,  1.35291684,
        0.88642934, -2.00163731, -0.37184254,  1.66902531, -0.43856974])

In [34]:
np.partition(arr, 3)

array([-2.00163731, -1.29622111, -0.5557303 , -0.51943872, -0.37184254,
       -0.43856974, -0.20470766,  0.28174615,  0.76902257,  0.47894334,
        1.00718936,  0.09290788,  0.27499163,  0.22891288,  1.35291684,
        0.88642934,  1.39340583,  1.96578057,  1.66902531,  1.24643474])

In [35]:
indices = np.argpartition(arr, 3)
indices

array([16, 11,  3,  2, 17, 19,  0,  7,  8,  1, 10,  6, 12, 13, 14, 15,  5,
        4, 18,  9])

In [36]:
arr.take(indices)

array([-2.00163731, -1.29622111, -0.5557303 , -0.51943872, -0.37184254,
       -0.43856974, -0.20470766,  0.28174615,  0.76902257,  0.47894334,
        1.00718936,  0.09290788,  0.27499163,  0.22891288,  1.35291684,
        0.88642934,  1.39340583,  1.96578057,  1.66902531,  1.24643474])

In [37]:
# numpy.searchsorted: Finding Elements in a Sorted Array

arr = np.array([0, 1, 7, 12, 15])
arr.searchsorted(9)

np.int64(3)

In [38]:
arr.searchsorted([0, 8, 11, 16])

array([0, 3, 3, 5])

In [41]:
arr = np.array([0, 0, 0, 1, 1, 1, 1])
arr.searchsorted([0, 1])

array([0, 3])

In [42]:
arr.searchsorted([0, 1], side='right')

array([3, 7])

In [43]:
data = np.floor(np.random.uniform(0, 10000, size=50))
bins = np.array([0, 100, 1000, 5000, 10000])
data

array([9940., 6768., 7908., 1709.,  268., 8003., 9037.,  246., 4917.,
       5262., 5963.,  519., 8950., 7282., 8183., 5002., 8101.,  959.,
       2189., 2587., 4681., 4593., 7095., 1780., 5314., 1677., 7688.,
       9281., 6094., 1501., 4896., 3773., 8486., 9110., 3838., 3154.,
       5683., 1878., 1258., 6875., 7996., 5735., 9732., 6340., 8884.,
       4954., 3516., 7142., 5039., 2256.])

In [44]:
labels = bins.searchsorted(data)
labels

array([4, 4, 4, 3, 2, 4, 4, 2, 3, 4, 4, 2, 4, 4, 4, 4, 4, 2, 3, 3, 3, 3,
       4, 3, 4, 3, 4, 4, 4, 3, 3, 3, 4, 4, 3, 3, 4, 3, 3, 4, 4, 4, 4, 4,
       4, 3, 3, 4, 4, 3])

In [45]:
import pandas as pd

pd.Series(data).groupby(labels).mean()

2     498.000000
3    3064.277778
4    7389.035714
dtype: float64

A.7 Writing Fast NumPy Functions with Numba

In [2]:
import numpy as np


def mean_distance(x, y):
    nx = len(x)
    result = 0.0
    count = 0
    for i in range(nx):
        result += x[i] - y[i]
        count += 1
    return result / count

x = np.random.randn(10000000)
y = np.random.randn(10000000)

%timeit mean_distance(x, y)

1.03 s ± 37.9 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [3]:
%timeit (x - y).mean()

7.2 ms ± 40.5 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [4]:
import numba as nb


numba_mean_distance = nb.jit(mean_distance)

@nb.jit
def nb_mean_distance(x, y):
    nx = len(x)
    result = 0.0
    count = 0
    for i in range(nx):
        result += x[i] - y[i]
        count += 1
    return result / count

%timeit numba_mean_distance(x, y)

5.48 ms ± 36.3 μs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [None]:
from numba import float64, njit


@njit(float64(float64[:], float64[:]))
def mean_distance(x, y):
    return (x - y).mean()

In [6]:
# Creating Custom numpy.unfunc Onjects with Numba

from numba import vectorize


@vectorize
def nb_add(x, y):
    return x + y

x = np.arange(10)
nb_add(x, x)

array([ 0,  2,  4,  6,  8, 10, 12, 14, 16, 18])

In [8]:
nb_add.accumulate(x, 0)

array([ 0,  1,  3,  6, 10, 15, 21, 28, 36, 45])

A.8 Advanced Array Input and Output

In [9]:
# Memory-Mapped Files

mmap = np.memmap('mymmap', dtype='float64', mode='w+', shape=(10000, 10000))
mmap


memmap([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]], shape=(10000, 10000))

In [10]:
section = mmap[:5]
section[:] = np.random.randn(5, 10000)
mmap.flush()
mmap

memmap([[-0.75762179,  0.74919615,  1.93418673, ..., -0.24280117,
          0.15099582,  1.21931092],
        [-0.33750775,  0.41992974,  0.97054936, ..., -1.0241729 ,
         -0.15088532, -0.64313819],
        [-0.97087318, -0.27480499,  1.05240717, ...,  0.30605841,
         -1.09217398, -0.01821762],
        ...,
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ]], shape=(10000, 10000))

In [11]:
del mmap

In [12]:
mmap = np.memmap('mymmap', dtype='float64', shape=(10000, 10000))
mmap

memmap([[-0.75762179,  0.74919615,  1.93418673, ..., -0.24280117,
          0.15099582,  1.21931092],
        [-0.33750775,  0.41992974,  0.97054936, ..., -1.0241729 ,
         -0.15088532, -0.64313819],
        [-0.97087318, -0.27480499,  1.05240717, ...,  0.30605841,
         -1.09217398, -0.01821762],
        ...,
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 0.        ,  0.        ,  0.        , ...,  0.        ,
          0.        ,  0.        ]], shape=(10000, 10000))

A.9 Performance Tips

In [14]:
# The Importance of Contiguous Memory

arr_c = np.ones((1000, 1000), order='C')
arr_f = np.ones((1000, 1000), order='F')
arr_c.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False

In [15]:
arr_f.flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : True
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False

In [16]:
arr_f.flags.f_contiguous

True

In [17]:
%timeit arr_c.sum(1)

118 μs ± 260 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [18]:
%timeit arr_f.sum(1)

126 μs ± 351 ns per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [19]:
arr_f.copy('C').flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False

In [20]:
arr_c[:50].flags.contiguous

True

In [21]:
arr_c[:, :50].flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False