A.1 ndarray Object Internals

In [1]:
import numpy as np

np.ones((10, 5)).shape

(10, 5)

In [2]:
np.ones((3, 4, 5), dtype=np.float64).strides

(160, 40, 8)

In [4]:
# NumPy dtype Hierarchy

ints = np.ones(10, dtype=np.uint16)
floats = np.ones(10, dtype=np.float32)
np.issubdtype(ints.dtype, np.integer)



True

In [5]:
np.issubdtype(floats.dtype, np.floating)

True

In [6]:
np.float64.mro()

[numpy.float64,
 numpy.floating,
 numpy.inexact,
 numpy.number,
 numpy.generic,
 float,
 object]

In [7]:
np.issubdtype(ints.dtype, np.number)

True

A.2 Advanced Array Manipulation

In [None]:
# Reshaping Arrays

arr = np.arange(8)
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [9]:
arr.reshape((4, 2))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [10]:
arr.reshape((4, 2)).reshape((2, 4))

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [11]:
arr = np.arange(15)
arr.reshape((5, -1))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [13]:
other_arr = np.ones((3, 5))
other_arr.shape

(3, 5)

In [14]:
arr.reshape(other_arr.shape)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [15]:
arr = np.arange(15).reshape((5, 3))
arr

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [16]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [17]:
arr.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [18]:
# C Versus Fortan Order

arr = np.arange(12).reshape((3, 4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [19]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [20]:
arr.ravel('F')

array([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])

In [22]:
# Concatenating and Splitting Arrays

arr1 = np.array([[1, 2, 3], [4, 5, 6]])
arr2 = np.array([[7, 8, 9], [10, 11, 12]])
np.concatenate([arr1, arr2], axis=0)

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [23]:
np.concatenate([arr1, arr2], axis=1)

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [24]:
np.vstack((arr1, arr2))

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [25]:
np.hstack((arr1, arr2))

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [26]:
arr = np.random.randn(5, 2)
arr

array([[-0.17436422,  0.38463187],
       [ 0.77542162, -0.79090572],
       [ 0.40936218, -0.47497652],
       [-1.72991043,  0.0289211 ],
       [ 0.25482025,  1.05906761]])

In [27]:
first, second, third = np.split(arr, [1, 3])
first

array([[-0.17436422,  0.38463187]])

In [28]:
second

array([[ 0.77542162, -0.79090572],
       [ 0.40936218, -0.47497652]])

In [29]:
third

array([[-1.72991043,  0.0289211 ],
       [ 0.25482025,  1.05906761]])

In [30]:
arr = np.arange(6)
arr1 = arr.reshape((3, 2))
arr2 = np.random.randn(3, 2)
np.r_[arr1, arr2]

array([[ 0.        ,  1.        ],
       [ 2.        ,  3.        ],
       [ 4.        ,  5.        ],
       [-1.5938487 , -0.27839344],
       [ 0.23748079, -0.26041985],
       [-0.88342441, -1.65434689]])

In [31]:
np.c_[np.r_[arr1, arr2], arr]

array([[ 0.        ,  1.        ,  0.        ],
       [ 2.        ,  3.        ,  1.        ],
       [ 4.        ,  5.        ,  2.        ],
       [-1.5938487 , -0.27839344,  3.        ],
       [ 0.23748079, -0.26041985,  4.        ],
       [-0.88342441, -1.65434689,  5.        ]])

In [32]:
np.c_[1:6, -10:-5]

array([[  1, -10],
       [  2,  -9],
       [  3,  -8],
       [  4,  -7],
       [  5,  -6]])

In [33]:
# Repeating Elements: tile and repeat

arr = np.arange(3)
arr

array([0, 1, 2])

In [34]:
arr.repeat(3)

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [35]:
arr.repeat([2, 3, 4])


array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [37]:
arr = np.random.randn(2, 2)
arr

array([[ 0.81930283, -1.50233923],
       [ 0.13100958,  1.38094736]])

In [38]:
arr.repeat(2, axis=0)

array([[ 0.81930283, -1.50233923],
       [ 0.81930283, -1.50233923],
       [ 0.13100958,  1.38094736],
       [ 0.13100958,  1.38094736]])

In [39]:
arr.repeat([2, 3], axis=0)

array([[ 0.81930283, -1.50233923],
       [ 0.81930283, -1.50233923],
       [ 0.13100958,  1.38094736],
       [ 0.13100958,  1.38094736],
       [ 0.13100958,  1.38094736]])

In [40]:
arr.repeat([2, 3], axis=1)

array([[ 0.81930283,  0.81930283, -1.50233923, -1.50233923, -1.50233923],
       [ 0.13100958,  0.13100958,  1.38094736,  1.38094736,  1.38094736]])

In [41]:
arr

array([[ 0.81930283, -1.50233923],
       [ 0.13100958,  1.38094736]])

In [42]:
np.tile(arr, (2, 1))

array([[ 0.81930283, -1.50233923],
       [ 0.13100958,  1.38094736],
       [ 0.81930283, -1.50233923],
       [ 0.13100958,  1.38094736]])

In [43]:
np.tile(arr, (3, 2))

array([[ 0.81930283, -1.50233923,  0.81930283, -1.50233923],
       [ 0.13100958,  1.38094736,  0.13100958,  1.38094736],
       [ 0.81930283, -1.50233923,  0.81930283, -1.50233923],
       [ 0.13100958,  1.38094736,  0.13100958,  1.38094736],
       [ 0.81930283, -1.50233923,  0.81930283, -1.50233923],
       [ 0.13100958,  1.38094736,  0.13100958,  1.38094736]])

In [44]:
# Facy Indexing Equivalents: take and put

arr = np.arange(10) * 100
inds = [7, 1, 2, 6]
arr[inds]

array([700, 100, 200, 600])

In [45]:
arr.take(inds)

array([700, 100, 200, 600])

In [47]:
arr.put(inds, 42)
arr

array([  0,  42,  42, 300, 400, 500,  42,  42, 800, 900])

In [48]:
inds = [2, 0, 2, 1]
arr = np.random.randn(2, 4)
arr

array([[-0.6050039 , -0.63999527, -0.25797005, -0.98507493],
       [ 0.62629298,  0.39179615, -1.42568854,  0.07293799]])

In [49]:
arr.take(inds, axis=1)

array([[-0.25797005, -0.6050039 , -0.25797005, -0.63999527],
       [-1.42568854,  0.62629298, -1.42568854,  0.39179615]])

A.3 Broadcasting

In [2]:
import numpy as np


arr = np.arange(5)
arr

array([0, 1, 2, 3, 4])

In [3]:
arr * 4

array([ 0,  4,  8, 12, 16])

In [4]:
arr = np.random.randn(4, 3)
arr.mean(0)

array([-0.05727587, -0.17194382, -0.00195156])

In [6]:
demeaned = arr - arr.mean(0)
demeaned

array([[-1.19041479,  2.05989947,  0.20173754],
       [ 0.07391815, -0.83229166, -0.6603563 ],
       [ 0.4827489 , -0.43622634,  1.64967486],
       [ 0.63374775, -0.79138146, -1.1910561 ]])

In [7]:
demeaned.mean(0)

array([ 0.00000000e+00, -2.77555756e-17,  0.00000000e+00])

In [8]:
arr

array([[-1.24769067,  1.88795565,  0.19978598],
       [ 0.01664227, -1.00423549, -0.66230787],
       [ 0.42547303, -0.60817017,  1.6477233 ],
       [ 0.57647187, -0.96332529, -1.19300767]])

In [9]:
row_means = arr.mean(1)
row_means.shape

(4,)

In [10]:
row_means.reshape(4, 1)

array([[ 0.28001699],
       [-0.54996703],
       [ 0.48834205],
       [-0.52662036]])

In [11]:
demeand = arr - row_means.reshape((4, 1))
demeand.mean(1)

array([-1.85037171e-17, -3.70074342e-17,  7.40148683e-17,  1.11022302e-16])

In [12]:
# Broadcasting Over Other Axes

arr - arr.mean(1)

ValueError: operands could not be broadcast together with shapes (4,3) (4,) 

In [13]:
arr - arr.mean(1).reshape((4,1))

array([[-1.52770765,  1.60793866, -0.08023101],
       [ 0.5666093 , -0.45426846, -0.11234084],
       [-0.06286903, -1.09651222,  1.15938125],
       [ 1.10309223, -0.43670493, -0.66638731]])

In [14]:
arr =np.zeros((4,4))
arr_3d = arr[:, np.newaxis, :]
arr_3d.shape

(4, 1, 4)

In [15]:
arr_1d = np.random.normal(size=3)
arr_1d[:, np.newaxis]

array([[1.00292464],
       [0.09190387],
       [0.66471496]])

In [16]:
arr_1d[np.newaxis, :]

array([[1.00292464, 0.09190387, 0.66471496]])

In [17]:
arr = np.random.randn(3, 4, 5)
depth_means = arr.mean(2)
depth_means

array([[-0.01759972,  0.24255211,  0.14551152,  0.69861989],
       [ 0.00762028,  0.3882635 , -0.5668809 , -1.00681192],
       [ 0.27071336,  0.91649147, -0.02275329,  0.65269149]])

In [18]:
depth_means.shape

(3, 4)

In [19]:
demeand = arr - depth_means[:, :, np.newaxis]
demeand.mean(2)

array([[-8.32667268e-18,  0.00000000e+00,  0.00000000e+00,
         8.88178420e-17],
       [ 2.22044605e-17,  3.88578059e-17,  6.66133815e-17,
         0.00000000e+00],
       [ 8.88178420e-17, -2.22044605e-17,  4.44089210e-17,
        -2.22044605e-17]])

In [None]:
def demean_axis(arr, axis=0):
    means = arr.mean(axis)
    indexer = [slice(None)] * arr.ndim
    indexer[axis] = np.newaxis
    return arr - means[indexer]


In [20]:
# Setting Array Values by Broadcasting


arr = np.zeros((4, 3))
arr[:] = 5
arr

array([[5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.],
       [5., 5., 5.]])

In [21]:
col = np.array([1.28, -0.42, 0.44, 1.6])
arr[:] = col[:, np.newaxis]
arr

array([[ 1.28,  1.28,  1.28],
       [-0.42, -0.42, -0.42],
       [ 0.44,  0.44,  0.44],
       [ 1.6 ,  1.6 ,  1.6 ]])

In [22]:
arr[:2] = [[-1.37], [0.509]]
arr

array([[-1.37 , -1.37 , -1.37 ],
       [ 0.509,  0.509,  0.509],
       [ 0.44 ,  0.44 ,  0.44 ],
       [ 1.6  ,  1.6  ,  1.6  ]])