In [400]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

In [401]:
plt.rc('figure', figsize=(10, 6))
PREVIOUS_MAX_ROWS = pd.options.display.max_rows
pd.options.display.max_columns = 20
pd.options.display.max_rows = 20
pd.options.display.max_colwidth = 80
np.set_printoptions(precision=4, suppress=True)

In [402]:
rng = np.random.default_rng(seed=12345)
rfloat = rng.random()
rfloat

0.22733602246716966

In [403]:
rints = rng.integers(low=0, high=10, size=13)
rints

array([7, 3, 2, 7, 6, 6, 9, 3, 8, 3, 5, 5, 2])

In [404]:
rng.random((3, 3))  # ???

array([[0.6728, 0.9418, 0.2482],
       [0.9489, 0.6672, 0.0959],
       [0.4418, 0.8865, 0.6975]])

In [405]:
np.ones((10, 5))

array([[1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1.]])

In [406]:
np.ones((10, 5)).shape

(10, 5)

In [407]:
# how many bytes we have to skip in memory to move to the next element along each axis
np.ones((3, 4, 5), dtype=np.float64).strides

(160, 40, 8)

In [408]:
temp = np.ones(10)
temp

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [409]:
np.issubdtype(temp.dtype, np.float64), temp.strides

(True, (8,))

In [410]:
ints = np.ones(10, dtype=np.uint16)
ints

array([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=uint16)

In [411]:
np.issubdtype(ints.dtype, np.integer), ints.strides

(True, (2,))

In [412]:
floats = np.ones(10, dtype=np.float32)
floats

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.], dtype=float32)

In [413]:
np.issubdtype(floats.dtype, np.floating), floats.strides

(True, (4,))

In [414]:
np.float64.mro()

[numpy.float64,
 numpy.floating,
 numpy.inexact,
 numpy.number,
 numpy.generic,
 float,
 object]

In [415]:
np.issubdtype(ints.dtype, np.number)

True

In [416]:
rng.random(8)

array([0.3265, 0.7339, 0.2201, 0.0816, 0.1599, 0.3401, 0.4652, 0.2664])

In [417]:
rng.random((8, 1))

array([[0.8158],
       [0.1933],
       [0.1295],
       [0.0917],
       [0.5986],
       [0.8547],
       [0.6016],
       [0.932 ]])

In [418]:
rng.random((1, 8))

array([[0.7248, 0.8606, 0.9293, 0.5462, 0.9377, 0.495 , 0.2738, 0.4518]])

In [419]:
# arr = np.arange(8)
arr = rng.random(8)
arr

array([0.665 , 0.3309, 0.9035, 0.2571, 0.3398, 0.2589, 0.3554, 0.005 ])

In [420]:
arr.reshape((4, 2))

array([[0.665 , 0.3309],
       [0.9035, 0.2571],
       [0.3398, 0.2589],
       [0.3554, 0.005 ]])

In [421]:
arr.reshape((4, 2)).reshape((2, 4))

array([[0.665 , 0.3309, 0.9035, 0.2571],
       [0.3398, 0.2589, 0.3554, 0.005 ]])

In [422]:
# arr = np.arange(15)
arr = rng.random(15)
arr.reshape((5, -1))

array([[0.6286, 0.2824, 0.0681],
       [0.6168, 0.1763, 0.3044],
       [0.4409, 0.1502, 0.2179],
       [0.4743, 0.4764, 0.2552],
       [0.2976, 0.2791, 0.2606]])

In [423]:
other_arr = np.ones((3, 5))
other_arr.shape

(3, 5)

In [424]:
arr.reshape(other_arr.shape)

array([[0.6286, 0.2824, 0.0681, 0.6168, 0.1763],
       [0.3044, 0.4409, 0.1502, 0.2179, 0.4743],
       [0.4764, 0.2552, 0.2976, 0.2791, 0.2606]])

In [425]:
# arr = np.arange(15).reshape((5, 3))
arr = rng.random(15).reshape((5, 3))
arr

array([[0.4828, 0.212 , 0.4956],
       [0.2463, 0.8385, 0.1801],
       [0.8622, 0.1783, 0.7505],
       [0.6111, 0.2092, 0.7599],
       [0.2493, 0.0856, 0.6181]])

In [426]:
arr.ravel() # Return a contiguous flattened array

array([0.4828, 0.212 , 0.4956, 0.2463, 0.8385, 0.1801, 0.8622, 0.1783,
       0.7505, 0.6111, 0.2092, 0.7599, 0.2493, 0.0856, 0.6181])

In [427]:
arr.flatten()   # return a copy

array([0.4828, 0.212 , 0.4956, 0.2463, 0.8385, 0.1801, 0.8622, 0.1783,
       0.7505, 0.6111, 0.2092, 0.7599, 0.2493, 0.0856, 0.6181])

In [428]:
# arr = np.arange(12).reshape((3, 4))
arr = rng.random(12).reshape((3, 4))
arr

array([[0.537 , 0.6345, 0.1744, 0.2482],
       [0.6848, 0.0809, 0.8751, 0.4287],
       [0.6184, 0.3131, 0.179 , 0.0097]])

In [429]:
arr.ravel()

array([0.537 , 0.6345, 0.1744, 0.2482, 0.6848, 0.0809, 0.8751, 0.4287,
       0.6184, 0.3131, 0.179 , 0.0097])

In [430]:
arr.ravel('F')  # Fortran-style order, with the first index changing fastest, and the last index changing slowest

array([0.537 , 0.6848, 0.6184, 0.6345, 0.0809, 0.3131, 0.1744, 0.8751,
       0.179 , 0.2482, 0.4287, 0.0097])

In [431]:
# arr1 = np.array([[1, 2, 3], [4, 5, 6]])
arr1 = rng.random(6).reshape((2, 3))
# arr1.shape
arr1

array([[0.21  , 0.87  , 0.9728],
       [0.4418, 0.3787, 0.2759]])

In [432]:
# arr2 = np.array([[7, 8, 9], [10, 11, 12]])
arr2 = rng.random(6).reshape((2, 3))
# arr2.shape
arr2

array([[0.9661, 0.0582, 0.4087],
       [0.1686, 0.2401, 0.78  ]])

In [433]:
np.concatenate([arr1, arr2], axis=0)    # Axis=0 Column-Wise Operation

array([[0.21  , 0.87  , 0.9728],
       [0.4418, 0.3787, 0.2759],
       [0.9661, 0.0582, 0.4087],
       [0.1686, 0.2401, 0.78  ]])

In [434]:
np.concatenate([arr1, arr2], axis=1)

array([[0.21  , 0.87  , 0.9728, 0.9661, 0.0582, 0.4087],
       [0.4418, 0.3787, 0.2759, 0.1686, 0.2401, 0.78  ]])

In [435]:
np.vstack((arr1, arr2))

array([[0.21  , 0.87  , 0.9728],
       [0.4418, 0.3787, 0.2759],
       [0.9661, 0.0582, 0.4087],
       [0.1686, 0.2401, 0.78  ]])

In [436]:
np.hstack((arr1, arr2))

array([[0.21  , 0.87  , 0.9728, 0.9661, 0.0582, 0.4087],
       [0.4418, 0.3787, 0.2759, 0.1686, 0.2401, 0.78  ]])

In [437]:
arr = rng.standard_normal((5, 2))
# arr = rng.standard_exponential((5, 2))
arr


array([[ 1.0638, -0.2752],
       [-1.8533, -0.1243],
       [ 0.785 ,  0.202 ],
       [-0.4281,  1.8483],
       [ 1.9   , -0.0984]])

In [438]:
first, second, third = np.split(arr, [1, 3])
first

array([[ 1.0638, -0.2752]])

In [439]:
second

array([[-1.8533, -0.1243],
       [ 0.785 ,  0.202 ]])

In [440]:
third

array([[-0.4281,  1.8483],
       [ 1.9   , -0.0984]])

In [441]:
# arr = np.arange(6)
arr = rng.random(6)
arr1 = arr.reshape((3, 2))
arr2 = rng.standard_normal((3, 2))
np.r_[arr1, arr2]

array([[ 0.9472,  0.0274],
       [ 0.9178,  0.1215],
       [ 0.7478,  0.8965],
       [-0.6535, -0.8119],
       [-0.0255,  1.1582],
       [ 0.3005,  0.0531]])

In [442]:
np.c_[np.r_[arr1, arr2], arr]

array([[ 0.9472,  0.0274,  0.9472],
       [ 0.9178,  0.1215,  0.0274],
       [ 0.7478,  0.8965,  0.9178],
       [-0.6535, -0.8119,  0.1215],
       [-0.0255,  1.1582,  0.7478],
       [ 0.3005,  0.0531,  0.8965]])

In [443]:
np.c_[1:6, -10:-5]

array([[  1, -10],
       [  2,  -9],
       [  3,  -8],
       [  4,  -7],
       [  5,  -6]])

In [444]:
arr = np.arange(3)
arr

array([0, 1, 2])

In [445]:
arr.repeat(3)

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [446]:
arr.repeat([2, 3, 4])

array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [447]:
arr = rng.standard_normal((2, 2))
arr

array([[ 0.2573,  0.0357],
       [ 0.5472, -1.123 ]])

In [448]:
arr.repeat(2, axis=0)

array([[ 0.2573,  0.0357],
       [ 0.2573,  0.0357],
       [ 0.5472, -1.123 ],
       [ 0.5472, -1.123 ]])

In [449]:
arr.repeat([2, 3], axis=0)

array([[ 0.2573,  0.0357],
       [ 0.2573,  0.0357],
       [ 0.5472, -1.123 ],
       [ 0.5472, -1.123 ],
       [ 0.5472, -1.123 ]])

In [450]:
arr.repeat([2, 3], axis=1)

array([[ 0.2573,  0.2573,  0.0357,  0.0357,  0.0357],
       [ 0.5472,  0.5472, -1.123 , -1.123 , -1.123 ]])

In [451]:
arr

array([[ 0.2573,  0.0357],
       [ 0.5472, -1.123 ]])

In [452]:
np.tile(arr, 2)

array([[ 0.2573,  0.0357,  0.2573,  0.0357],
       [ 0.5472, -1.123 ,  0.5472, -1.123 ]])

In [453]:
arr

array([[ 0.2573,  0.0357],
       [ 0.5472, -1.123 ]])

In [454]:
np.tile(arr, (2, 1))

array([[ 0.2573,  0.0357],
       [ 0.5472, -1.123 ],
       [ 0.2573,  0.0357],
       [ 0.5472, -1.123 ]])

In [455]:
np.tile(arr, (3, 2))

array([[ 0.2573,  0.0357,  0.2573,  0.0357],
       [ 0.5472, -1.123 ,  0.5472, -1.123 ],
       [ 0.2573,  0.0357,  0.2573,  0.0357],
       [ 0.5472, -1.123 ,  0.5472, -1.123 ],
       [ 0.2573,  0.0357,  0.2573,  0.0357],
       [ 0.5472, -1.123 ,  0.5472, -1.123 ]])

In [456]:
# arr = np.arange(10) * 100
arr = rng.random(10) * 100
inds = [7, 1, 2, 6]
arr[inds]

array([76.8128, 78.2571, 96.456 , 34.7535])

In [457]:
arr.take(inds)

array([76.8128, 78.2571, 96.456 , 34.7535])

In [458]:
arr.put(inds, 42)
arr

array([48.1228, 42.    , 42.    , 70.7096, 27.3737, 67.0113, 42.    ,
       42.    , 67.5771, 97.7532])

In [459]:
arr.put(inds, [40, 41, 42, 43])
arr

array([48.1228, 41.    , 42.    , 70.7096, 27.3737, 67.0113, 43.    ,
       40.    , 67.5771, 97.7532])

In [460]:
inds = [2, 0, 2, 1]
arr = rng.standard_normal((2, 4))
arr

array([[ 0.8412, -0.7759,  0.4107, -2.7224],
       [-0.6733,  1.2462,  0.7902,  0.1753]])

In [461]:
arr.take(inds, axis=1)

array([[ 0.4107,  0.8412,  0.4107, -0.7759],
       [ 0.7902, -0.6733,  0.7902,  1.2462]])

In [462]:
# arr = np.arange(5)
arr = rng.random(5)
arr

array([0, 1, 2, 3, 4])

In [463]:
arr * 4

array([ 0,  4,  8, 12, 16])

In [464]:
arr = rng.standard_normal((4, 3))
arr.mean(0)

array([-0.0117,  0.1036, -0.6336])

In [465]:
demeaned = arr - arr.mean(0)
demeaned

array([[-0.0176, -1.5231, -0.7264],
       [ 0.2351,  1.6582, -1.5373],
       [ 0.6401,  0.4976,  1.5844],
       [-0.8576, -0.6326,  0.6793]])

In [466]:
demeaned.mean(0)

array([ 0.,  0., -0.])

In [467]:
arr

array([[-0.0293, -1.4195, -1.36  ],
       [ 0.2234,  1.7618, -2.1709],
       [ 0.6285,  0.6012,  0.9508],
       [-0.8692, -0.529 ,  0.0457]])

In [468]:
row_means = arr.mean(1)
row_means.shape

(4,)

In [469]:
row_means.reshape((4, 1))

array([[-0.9363],
       [-0.0619],
       [ 0.7268],
       [-0.4509]])

In [470]:
demeaned = arr - row_means.reshape((4, 1))
demeaned.mean(1)

array([-0.,  0.,  0., -0.])

In [471]:
# arr - arr.mean(1)     # ValueError: operands could not be broadcast together with shapes (4,3) (4,) 

In [472]:
temp = arr.mean(1)
temp = temp.reshape(-1, 1)
arr - temp

array([[ 0.907 , -0.4833, -0.4237],
       [ 0.2853,  1.8237, -2.109 ],
       [-0.0983, -0.1256,  0.2239],
       [-0.4184, -0.0782,  0.4965]])

In [473]:
arr - arr.mean(1).reshape((4, 1))

array([[ 0.907 , -0.4833, -0.4237],
       [ 0.2853,  1.8237, -2.109 ],
       [-0.0983, -0.1256,  0.2239],
       [-0.4184, -0.0782,  0.4965]])

In [474]:
arr = np.zeros((4, 4))
arr_3d = arr[:, np.newaxis, :]
arr_3d.shape

(4, 1, 4)

In [475]:
arr_1d = rng.standard_normal(3)
arr_1d[:, np.newaxis]

array([[-1.0276],
       [-1.2293],
       [-0.8834]])

In [476]:
arr_1d[np.newaxis, :]

array([[-1.0276, -1.2293, -0.8834]])

In [477]:
arr = rng.standard_normal((3, 4, 5))
depth_means = arr.mean(2)
depth_means

array([[-0.0656,  0.2397,  0.6312,  0.4959],
       [-0.141 ,  0.3275,  0.3521, -0.8672],
       [ 0.4675,  0.4946, -0.8301, -0.159 ]])

In [478]:
depth_means.shape

(3, 4)

In [479]:
demeaned = arr - depth_means[:, :, np.newaxis]
demeaned.mean(2)

array([[ 0.,  0., -0.,  0.],
       [-0., -0.,  0., -0.],
       [ 0.,  0.,  0., -0.]])

In [480]:
arr = np.zeros((4, 3))
arr[:] = 5
arr

array([[5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.]])

In [481]:
col = np.array([1.28, -0.42, 0.44, 1.6])
arr[:] = col[:, np.newaxis]
arr

array([[ 1.28,  1.28,  1.28],
       [-0.42, -0.42, -0.42],
       [ 0.44,  0.44,  0.44],
       [ 1.6 ,  1.6 ,  1.6 ]])

In [482]:
arr[:2] = [[-1.37], [0.509]]
arr

array([[-1.37 , -1.37 , -1.37 ],
       [ 0.509,  0.509,  0.509],
       [ 0.44 ,  0.44 ,  0.44 ],
       [ 1.6  ,  1.6  ,  1.6  ]])

In [483]:
arr = np.arange(10)
np.add.reduce(arr)

45

In [484]:
arr.sum()

45

In [485]:
my_rng = np.random.default_rng(12346)  # for reproducibility
arr = my_rng.standard_normal((5, 5))
arr

array([[-0.9039,  0.1571,  0.8976, -0.7622, -0.1763],
       [ 0.053 , -1.6284, -0.1775,  1.9636,  1.7813],
       [-0.8797, -1.6985, -1.8189,  0.119 , -0.4441],
       [ 0.7691, -0.0343,  0.3925,  0.7589, -0.0705],
       [ 1.0498,  1.0297, -0.4201,  0.7863,  0.9612]])

In [486]:
arr[::2].sort(1) # sort a few rows

In [487]:
arr[:, :-1] < arr[:, 1:]

array([[ True,  True,  True,  True],
       [False,  True,  True, False],
       [ True,  True,  True,  True],
       [False,  True,  True, False],
       [ True,  True,  True,  True]])

In [488]:
np.logical_and.reduce(arr[:, :-1] < arr[:, 1:], axis=1)

array([ True, False,  True, False,  True])

In [489]:
arr = np.arange(15).reshape((3, 5))
np.add.accumulate(arr, axis=1)

array([[ 0,  1,  3,  6, 10],
       [ 5, 11, 18, 26, 35],
       [10, 21, 33, 46, 60]])

In [490]:
arr = np.arange(3).repeat([1, 2, 2])
arr

array([0, 1, 1, 2, 2])

In [491]:
np.multiply.outer(arr, np.arange(5))

array([[0, 0, 0, 0, 0],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 2, 4, 6, 8],
       [0, 2, 4, 6, 8]])

In [492]:
x, y = rng.standard_normal((3, 4)), rng.standard_normal(5)
result = np.subtract.outer(x, y)
result.shape

(3, 4, 5)

In [493]:
arr = np.arange(10)
np.add.reduceat(arr, [0, 5, 8])

array([10, 18, 17])

In [494]:
arr = np.multiply.outer(np.arange(4), np.arange(5))
arr

array([[ 0,  0,  0,  0,  0],
       [ 0,  1,  2,  3,  4],
       [ 0,  2,  4,  6,  8],
       [ 0,  3,  6,  9, 12]])

In [495]:
np.add.reduceat(arr, [0, 2, 4], axis=1)

array([[ 0,  0,  0],
       [ 1,  5,  4],
       [ 2, 10,  8],
       [ 3, 15, 12]])

In [496]:
def add_elements(x, y):
    return x + y
add_them = np.frompyfunc(add_elements, 2, 1)
add_them(np.arange(8), np.arange(8))

array([0, 2, 4, 6, 8, 10, 12, 14], dtype=object)

In [497]:
add_them = np.vectorize(add_elements, otypes=[np.float64])
add_them(np.arange(8), np.arange(8))

array([ 0.,  2.,  4.,  6.,  8., 10., 12., 14.])

In [498]:
arr = rng.standard_normal(10000)
%timeit add_them(arr, arr)

1.06 ms ± 17.9 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [499]:
%timeit np.add(arr, arr)

1.63 µs ± 56.3 ns per loop (mean ± std. dev. of 7 runs, 1,000,000 loops each)


In [500]:
dtype = [('x', np.float64), ('y', np.int32)]
sarr = np.array([(1.5, 6), (np.pi, -2)], dtype=dtype)
sarr

array([(1.5   ,  6), (3.1416, -2)], dtype=[('x', '<f8'), ('y', '<i4')])

In [501]:
sarr[0]

(1.5, 6)

In [502]:
sarr[0]['y']

6

In [503]:
sarr['x']

array([1.5   , 3.1416])

In [504]:
dtype = [('x', np.int64, 3), ('y', np.int32)]
arr = np.zeros(4, dtype=dtype)
arr

array([([0, 0, 0], 0), ([0, 0, 0], 0), ([0, 0, 0], 0), ([0, 0, 0], 0)],
      dtype=[('x', '<i8', (3,)), ('y', '<i4')])

In [505]:
arr[0]['x']

array([0, 0, 0])

In [506]:
arr['x']

array([[0, 0, 0],
       [0, 0, 0],
       [0, 0, 0],
       [0, 0, 0]])

In [507]:
dtype = [('x', [('a', 'f8'), ('b', 'f4')]), ('y', np.int32)]
data = np.array([((1, 2), 5), ((3, 4), 6)], dtype=dtype)
data['x']

array([(1., 2.), (3., 4.)], dtype=[('a', '<f8'), ('b', '<f4')])

In [508]:
data['y']

array([5, 6], dtype=int32)

In [509]:
data['x']['a']

array([1., 3.])

In [510]:
arr = rng.standard_normal(6)
arr.sort()

In [511]:
arr

array([-1.8416, -1.2074, -0.7067,  0.085 ,  0.7443,  2.0162])

In [512]:
arr = rng.standard_normal((3, 5))
arr

array([[-0.1426, -0.114 ,  0.847 , -0.4036,  0.0754],
       [ 1.0137, -0.2478, -2.1987, -0.6476, -0.6764],
       [-0.9878,  0.4887, -0.0325, -0.887 , -0.6754]])

In [513]:
arr[:, 0].sort()  # Sort first column values in place
arr

array([[-0.9878, -0.114 ,  0.847 , -0.4036,  0.0754],
       [-0.1426, -0.2478, -2.1987, -0.6476, -0.6764],
       [ 1.0137,  0.4887, -0.0325, -0.887 , -0.6754]])

In [514]:
arr = rng.standard_normal(5)
arr

array([-1.5187, -0.6441,  1.7107, -1.7836,  1.7246])

In [515]:
np.sort(arr)
arr

array([-1.5187, -0.6441,  1.7107, -1.7836,  1.7246])

In [516]:
arr = rng.standard_normal((3, 5))
arr

array([[-0.1718, -0.3261, -0.6738, -1.3324,  0.48  ],
       [-0.4428, -0.7791, -0.1936,  1.369 , -0.5886],
       [-0.2925,  1.1759, -1.1339,  0.6946,  2.2397]])

In [517]:
arr.sort(axis=1)
arr

array([[-1.3324, -0.6738, -0.3261, -0.1718,  0.48  ],
       [-0.7791, -0.5886, -0.4428, -0.1936,  1.369 ],
       [-1.1339, -0.2925,  0.6946,  1.1759,  2.2397]])

In [518]:
arr[:, ::-1]

array([[ 0.48  , -0.1718, -0.3261, -0.6738, -1.3324],
       [ 1.369 , -0.1936, -0.4428, -0.5886, -0.7791],
       [ 2.2397,  1.1759,  0.6946, -0.2925, -1.1339]])

In [519]:
values = np.array([5, 0, 1, 3, 2])
indexer = values.argsort()
indexer

array([1, 2, 4, 3, 0])

In [520]:
values[indexer]

array([0, 1, 2, 3, 5])

In [521]:
arr = rng.standard_normal((3, 5))
arr[0] = values
arr

array([[ 5.    ,  0.    ,  1.    ,  3.    ,  2.    ],
       [ 0.3815,  0.1558, -0.9755,  1.031 ,  0.3377],
       [ 0.3342,  0.3567,  0.3555,  0.1248, -0.1526]])

In [522]:
arr[:, arr[0].argsort()]

array([[ 0.    ,  1.    ,  2.    ,  3.    ,  5.    ],
       [ 0.1558, -0.9755,  0.3377,  1.031 ,  0.3815],
       [ 0.3567,  0.3555, -0.1526,  0.1248,  0.3342]])

In [523]:
first_name = np.array(['Bob', 'Jane', 'Steve', 'Bill', 'Barbara'])
last_name = np.array(['Jones', 'Arnold', 'Arnold', 'Jones', 'Walters'])
sorter = np.lexsort((first_name, last_name))
sorter

array([1, 2, 3, 0, 4])

In [524]:
list(zip(last_name[sorter], first_name[sorter]))

[('Arnold', 'Jane'),
 ('Arnold', 'Steve'),
 ('Jones', 'Bill'),
 ('Jones', 'Bob'),
 ('Walters', 'Barbara')]

In [525]:
values = np.array(['2:first', '2:second', '1:first', '1:second',
                   '1:third'])
key = np.array([2, 2, 1, 1, 1])
indexer = key.argsort(kind='mergesort')
indexer

array([2, 3, 4, 0, 1])

In [526]:
values.take(indexer)

array(['1:first', '1:second', '1:third', '2:first', '2:second'],
      dtype='<U8')

In [527]:
rng = np.random.default_rng(12345)
arr = rng.standard_normal(20)
arr

array([-1.4238,  1.2637, -0.8707, -0.2592, -0.0753, -0.7409, -1.3678,
        0.6489,  0.3611, -1.9529,  2.3474,  0.9685, -0.7594,  0.9022,
       -0.467 , -0.0607,  0.7888, -1.2567,  0.5759,  1.399 ])

In [528]:
np.partition(arr, 3)

array([-1.9529, -1.4238, -1.3678, -1.2567, -0.8707, -0.7594, -0.7409,
       -0.0607,  0.3611, -0.0753, -0.2592, -0.467 ,  0.5759,  0.9022,
        0.9685,  0.6489,  0.7888,  1.2637,  1.399 ,  2.3474])

In [529]:
indices = np.argpartition(arr, 3)
indices

array([ 9,  0,  6, 17,  2, 12,  5, 15,  8,  4,  3, 14, 18, 13, 11,  7, 16,
        1, 19, 10])

In [530]:
arr.take(indices)

array([-1.9529, -1.4238, -1.3678, -1.2567, -0.8707, -0.7594, -0.7409,
       -0.0607,  0.3611, -0.0753, -0.2592, -0.467 ,  0.5759,  0.9022,
        0.9685,  0.6489,  0.7888,  1.2637,  1.399 ,  2.3474])

In [531]:
arr = np.array([0, 1, 7, 12, 15])
arr.searchsorted(9)

3

In [532]:
arr.searchsorted([0, 8, 11, 16])

array([0, 3, 3, 5])

In [533]:
arr = np.array([0, 0, 0, 1, 1, 1, 1])
arr.searchsorted([0, 1])

array([0, 3])

In [534]:
arr.searchsorted([0, 1], side='right')

array([3, 7])

In [535]:
data = np.floor(rng.uniform(0, 10000, size=50))
bins = np.array([0, 100, 1000, 5000, 10000])
data

array([ 815., 1598., 3401., 4651., 2664., 8157., 1932., 1294.,  916.,
       5985., 8547., 6016., 9319., 7247., 8605., 9293., 5461., 9376.,
       4949., 2737., 4517., 6650., 3308., 9034., 2570., 3398., 2588.,
       3554.,   50., 6286., 2823.,  680., 6168., 1763., 3043., 4408.,
       1502., 2179., 4743., 4763., 2552., 2975., 2790., 2605., 4827.,
       2119., 4956., 2462., 8384., 1801.])

In [536]:
labels = bins.searchsorted(data)
labels

array([2, 3, 3, 3, 3, 4, 3, 3, 2, 4, 4, 4, 4, 4, 4, 4, 4, 4, 3, 3, 3, 4,
       3, 4, 3, 3, 3, 3, 1, 4, 3, 2, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
       3, 3, 3, 3, 4, 3])

In [537]:
pd.Series(data).groupby(labels).mean()

1      50.000000
2     803.666667
3    3079.741935
4    7635.200000
dtype: float64

In [538]:
import numpy as np

def mean_distance(x, y):
    nx = len(x)
    result = 0.0
    count = 0
    for i in range(nx):
        result += x[i] - y[i]
        count += 1
    return result / count

In [539]:
mmap = np.memmap('mymmap', dtype='float64', mode='w+',
                 shape=(10000, 10000))
mmap

memmap([[0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        ...,
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.],
        [0., 0., 0., ..., 0., 0., 0.]])

In [540]:
section = mmap[:5]

In [541]:
section[:] = rng.standard_normal((5, 10000))
mmap.flush()
mmap

memmap([[-0.9074, -1.0954,  0.0071, ...,  0.2753, -1.1641,  0.8521],
        [-0.0103, -0.0646, -1.0615, ..., -1.1003,  0.2505,  0.5832],
        [ 0.4583,  1.2992,  1.7137, ...,  0.8691, -0.7889, -0.2431],
        ...,
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ],
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ],
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ]])

In [542]:
del mmap

In [543]:
mmap = np.memmap('mymmap', dtype='float64', shape=(10000, 10000))
mmap

memmap([[-0.9074, -1.0954,  0.0071, ...,  0.2753, -1.1641,  0.8521],
        [-0.0103, -0.0646, -1.0615, ..., -1.1003,  0.2505,  0.5832],
        [ 0.4583,  1.2992,  1.7137, ...,  0.8691, -0.7889, -0.2431],
        ...,
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ],
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ],
        [ 0.    ,  0.    ,  0.    , ...,  0.    ,  0.    ,  0.    ]])

In [544]:
%xdel mmap
!rm mymmap

In [545]:
arr_c = np.ones((100, 10000), order='C')
arr_f = np.ones((100, 10000), order='F')
arr_c.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [546]:
arr_f.flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : True
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [547]:
arr_f.flags.f_contiguous

True

In [548]:
%timeit arr_c.sum(1)

161 µs ± 1.42 µs per loop (mean ± std. dev. of 7 runs, 10,000 loops each)


In [549]:
%timeit arr_f.sum(1)

281 µs ± 112 µs per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [550]:
arr_f.copy('C').flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [551]:
arr_c[:50].flags.contiguous

True

In [552]:
arr_c[:, :50].flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  WRITEBACKIFCOPY : False
  UPDATEIFCOPY : False

In [553]:
%xdel arr_c

In [554]:
%xdel arr_f

In [555]:
pd.options.display.max_rows = PREVIOUS_MAX_ROWS