In [1]:
import numpy as np

# ndarray对象的内部机理

In [2]:
np.ones((10, 5)).shape

(10, 5)

In [3]:
np.ones((3, 4, 5), dtype = np.float64).strides

(160, 40, 8)

## NumPy数据类型体系

In [4]:
ints = np.ones(10, dtype = np.uint16)

In [5]:
floats = np.ones(10, dtype = np.float32)

In [6]:
np.issubdtype(ints.dtype, np.integer)

True

In [7]:
np.issubdtype(floats.dtype, np.floating)

True

In [8]:
np.float64.mro()

[numpy.float64,
 numpy.floating,
 numpy.inexact,
 numpy.number,
 numpy.generic,
 float,
 object]

# 高级数组操作

## 数组重塑

In [9]:
arr = np.arange(8)

In [10]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [11]:
arr.reshape((4, 2))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [12]:
arr.reshape((4, 2)).reshape((2, 4))

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [13]:
arr = np.arange(15)

In [14]:
arr.reshape((5, -1))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [15]:
other_arr = np.ones((3, 5))

In [16]:
other_arr.shape

(3, 5)

In [17]:
arr.reshape(other_arr.shape)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [18]:
arr = np.arange(15).reshape((5, 3))

In [19]:
arr

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [20]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [21]:
arr.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

## C和Fortran顺序

In [22]:
arr = np.arange(12).reshape((3, 4))

In [23]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [24]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [25]:
arr.ravel('F')

array([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])

## 数组的合并和拆分

In [26]:
arr1 = np.array([[1, 2, 3], [4, 5, 6]])

In [27]:
arr2 = np.array([[7, 8, 9], [10, 11, 12]])

In [28]:
np.concatenate([arr1, arr2], axis = 0)

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [29]:
np.concatenate([arr1, arr2], axis = 1)

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [30]:
np.vstack((arr1, arr2))

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [31]:
np.hstack((arr1, arr2))

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [32]:
from numpy.random import randn

In [33]:
arr = randn(5, 2)

In [34]:
arr

array([[-0.08986973,  0.96264015],
       [-0.08953003,  1.67045694],
       [ 0.11025888,  1.78049663],
       [ 0.87316627, -1.13870077],
       [ 0.75539323,  0.42927654]])

In [35]:
first, second, third = np.split(arr, [1, 3])

In [36]:
first

array([[-0.08986973,  0.96264015]])

In [37]:
second

array([[-0.08953003,  1.67045694],
       [ 0.11025888,  1.78049663]])

In [38]:
third

array([[ 0.87316627, -1.13870077],
       [ 0.75539323,  0.42927654]])

### 堆叠辅助类: r_和c_　

In [39]:
arr = np.arange(6)

In [40]:
arr1 = arr.reshape((3, 2))

In [41]:
arr2 = randn(3, 2)

In [42]:
np.r_[arr1, arr2]

array([[ 0.        ,  1.        ],
       [ 2.        ,  3.        ],
       [ 4.        ,  5.        ],
       [ 0.4260782 , -0.44830484],
       [ 0.36419918,  1.02607569],
       [ 0.29480238,  0.26431855]])

In [43]:
np.c_[np.r_[arr1, arr2], arr]

array([[ 0.        ,  1.        ,  0.        ],
       [ 2.        ,  3.        ,  1.        ],
       [ 4.        ,  5.        ,  2.        ],
       [ 0.4260782 , -0.44830484,  3.        ],
       [ 0.36419918,  1.02607569,  4.        ],
       [ 0.29480238,  0.26431855,  5.        ]])

In [44]:
np.c_[1: 6, -10: -5]

array([[  1, -10],
       [  2,  -9],
       [  3,  -8],
       [  4,  -7],
       [  5,  -6]])

## 元素的重复操作: tile和repeat

In [45]:
arr = np.arange(3)

In [46]:
arr.repeat(3)

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [47]:
arr.repeat([2, 3, 4])

array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [48]:
arr = randn(2, 2)

In [49]:
arr

array([[-1.34629884,  1.37618352],
       [-0.6421091 , -0.34660434]])

In [50]:
arr.repeat(2, axis = 0)

array([[-1.34629884,  1.37618352],
       [-1.34629884,  1.37618352],
       [-0.6421091 , -0.34660434],
       [-0.6421091 , -0.34660434]])

In [51]:
arr.repeat([2, 3], axis = 0)

array([[-1.34629884,  1.37618352],
       [-1.34629884,  1.37618352],
       [-0.6421091 , -0.34660434],
       [-0.6421091 , -0.34660434],
       [-0.6421091 , -0.34660434]])

In [52]:
arr.repeat([2, 3], axis = 1)

array([[-1.34629884, -1.34629884,  1.37618352,  1.37618352,  1.37618352],
       [-0.6421091 , -0.6421091 , -0.34660434, -0.34660434, -0.34660434]])

In [53]:
arr

array([[-1.34629884,  1.37618352],
       [-0.6421091 , -0.34660434]])

In [54]:
np.tile(arr, 2)

array([[-1.34629884,  1.37618352, -1.34629884,  1.37618352],
       [-0.6421091 , -0.34660434, -0.6421091 , -0.34660434]])

In [55]:
arr

array([[-1.34629884,  1.37618352],
       [-0.6421091 , -0.34660434]])

In [56]:
np.tile(arr, (2, 1))

array([[-1.34629884,  1.37618352],
       [-0.6421091 , -0.34660434],
       [-1.34629884,  1.37618352],
       [-0.6421091 , -0.34660434]])

In [57]:
np.tile(arr, (3, 2))

array([[-1.34629884,  1.37618352, -1.34629884,  1.37618352],
       [-0.6421091 , -0.34660434, -0.6421091 , -0.34660434],
       [-1.34629884,  1.37618352, -1.34629884,  1.37618352],
       [-0.6421091 , -0.34660434, -0.6421091 , -0.34660434],
       [-1.34629884,  1.37618352, -1.34629884,  1.37618352],
       [-0.6421091 , -0.34660434, -0.6421091 , -0.34660434]])

## 花式索引的等价函数: take和put

In [58]:
arr = np.arange(10) * 100

In [59]:
inds = [7, 1, 2, 6]

In [60]:
arr[inds]

array([700, 100, 200, 600])

In [61]:
arr.take(inds)

array([700, 100, 200, 600])

In [62]:
arr.put(inds, 42)

In [63]:
arr

array([  0,  42,  42, 300, 400, 500,  42,  42, 800, 900])

In [64]:
arr.put(inds, [40, 41, 42, 43])

In [65]:
arr

array([  0,  41,  42, 300, 400, 500,  43,  40, 800, 900])

In [66]:
inds = [2, 0, 2, 1]

In [67]:
arr = randn(2, 4)

In [68]:
arr

array([[ 0.33736227, -0.77535451, -0.69366701,  2.55819794],
       [-1.4988324 ,  1.69012791, -0.6396865 ,  1.44585923]])

In [69]:
arr.take(inds, axis = 1)

array([[-0.69366701,  0.33736227, -0.69366701, -0.77535451],
       [-0.6396865 , -1.4988324 , -0.6396865 ,  1.69012791]])

In [70]:
# performance issue
arr = randn(1000, 50)

inds = np.random.permutation(1000)[:500]

%timeit arr[inds]

%timeit arr.take(inds, axis = 0)

The slowest run took 5.76 times longer than the fastest. This could mean that an intermediate result is being cached.
10000 loops, best of 3: 21.7 µs per loop
The slowest run took 6.10 times longer than the fastest. This could mean that an intermediate result is being cached.
10000 loops, best of 3: 25.9 µs per loop


# 广播

In [71]:
arr = np.arange(5)

In [72]:
arr

array([0, 1, 2, 3, 4])

In [73]:
arr * 4

array([ 0,  4,  8, 12, 16])

In [74]:
arr = randn(4, 3)

In [75]:
arr.mean(0)

array([-0.05955176,  0.18118935, -0.22583909])

In [76]:
demeaned = arr - arr.mean(0)

In [77]:
demeaned

array([[-0.78753346, -0.36143343,  0.2351134 ],
       [ 1.03286281,  1.2466792 ,  0.70599548],
       [-0.32877119,  0.66584418, -0.12670401],
       [ 0.08344183, -1.55108996, -0.81440487]])

In [78]:
demeaned.mean(0)

array([  3.46944695e-18,  -5.55111512e-17,   0.00000000e+00])

In [79]:
arr

array([[-0.84708522, -0.18024408,  0.00927431],
       [ 0.97331105,  1.42786856,  0.48015639],
       [-0.38832295,  0.84703354, -0.3525431 ],
       [ 0.02389007, -1.3699006 , -1.04024396]])

In [80]:
row_means = arr.mean(1)

In [81]:
row_means.reshape((4, 1))

array([[-0.33935166],
       [ 0.96044533],
       [ 0.03538916],
       [-0.79541816]])

In [82]:
demeaned = arr - row_means.reshape((4, 1))

In [83]:
demeaned.mean(1)

array([  0.00000000e+00,   3.70074342e-17,   0.00000000e+00,
         7.40148683e-17])

## 沿其他轴向广播

In [84]:
arr - arr.mean(1).reshape((4, 1))

array([[-0.50773356,  0.15910759,  0.34862597],
       [ 0.01286571,  0.46742323, -0.48028894],
       [-0.42371212,  0.81164437, -0.38793226],
       [ 0.81930823, -0.57448244, -0.24482579]])

In [85]:
arr = np.zeros((4, 4))

In [86]:
arr_3d = arr[:, np.newaxis, :]

In [87]:
arr_3d.shape

(4, 1, 4)

In [88]:
arr_1d = np.random.normal(size = 3)

In [89]:
arr_1d[:, np.newaxis]

array([[-0.08572775],
       [-1.63096188],
       [ 0.11883472]])

In [90]:
arr_1d[np.newaxis, :]

array([[-0.08572775, -1.63096188,  0.11883472]])

In [91]:
arr = randn(3, 4, 5)

In [92]:
depth_mean = arr.mean(2)

In [93]:
depth_mean

array([[ 0.23555739, -0.27574239,  0.61361266, -0.2236085 ],
       [-0.26155157,  0.4323338 , -0.44497441, -0.20652084],
       [-0.03111959,  0.04018112,  1.2941668 , -0.00822348]])

In [94]:
demeaned = arr - depth_mean[:, :, np.newaxis]

In [95]:
demeaned.mean(2)

array([[  0.00000000e+00,   0.00000000e+00,  -1.11022302e-16,
          4.44089210e-17],
       [  1.77635684e-16,   0.00000000e+00,   4.44089210e-17,
         -1.66533454e-17],
       [ -4.44089210e-17,   0.00000000e+00,   4.44089210e-17,
          0.00000000e+00]])

In [96]:
def demean_axis(arr, axis = 0):
    means = arr.mean(axis)
    indexer = [slice(None)] * arr.ndim
    indexer[axis] = np.newaxis
    return arr - means[indexer]

## 通过广播设置数组的值

In [97]:
arr = np.zeros((4, 3))

In [98]:
arr[:] = 5

In [99]:
arr

array([[ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.]])

In [100]:
col = np.array([1.28, -.42, .44, 1.6])

In [101]:
arr[:] = col[:, np.newaxis]

In [102]:
arr

array([[ 1.28,  1.28,  1.28],
       [-0.42, -0.42, -0.42],
       [ 0.44,  0.44,  0.44],
       [ 1.6 ,  1.6 ,  1.6 ]])

In [103]:
arr[:2] = [[-1.37], [.509]]

In [104]:
arr

array([[-1.37 , -1.37 , -1.37 ],
       [ 0.509,  0.509,  0.509],
       [ 0.44 ,  0.44 ,  0.44 ],
       [ 1.6  ,  1.6  ,  1.6  ]])