In [1]:
import numpy as np

# ndarray对象的内部机理

In [2]:
np.ones((10, 5)).shape

(10, 5)

In [3]:
np.ones((3, 4, 5), dtype = np.float64).strides

(160, 40, 8)

## NumPy数据类型体系

In [4]:
ints = np.ones(10, dtype = np.uint16)

In [5]:
floats = np.ones(10, dtype = np.float32)

In [6]:
np.issubdtype(ints.dtype, np.integer)

True

In [7]:
np.issubdtype(floats.dtype, np.floating)

True

In [8]:
np.float64.mro()

[numpy.float64,
 numpy.floating,
 numpy.inexact,
 numpy.number,
 numpy.generic,
 float,
 object]

# 高级数组操作

## 数组重塑

In [9]:
arr = np.arange(8)

In [10]:
arr

array([0, 1, 2, 3, 4, 5, 6, 7])

In [11]:
arr.reshape((4, 2))

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

In [12]:
arr.reshape((4, 2)).reshape((2, 4))

array([[0, 1, 2, 3],
       [4, 5, 6, 7]])

In [13]:
arr = np.arange(15)

In [14]:
arr.reshape((5, -1))

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [15]:
other_arr = np.ones((3, 5))

In [16]:
other_arr.shape

(3, 5)

In [17]:
arr.reshape(other_arr.shape)

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [18]:
arr = np.arange(15).reshape((5, 3))

In [19]:
arr

array([[ 0,  1,  2],
       [ 3,  4,  5],
       [ 6,  7,  8],
       [ 9, 10, 11],
       [12, 13, 14]])

In [20]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [21]:
arr.flatten()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

## C和Fortran顺序

In [22]:
arr = np.arange(12).reshape((3, 4))

In [23]:
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [24]:
arr.ravel()

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

In [25]:
arr.ravel('F')

array([ 0,  4,  8,  1,  5,  9,  2,  6, 10,  3,  7, 11])

## 数组的合并和拆分

In [26]:
arr1 = np.array([[1, 2, 3], [4, 5, 6]])

In [27]:
arr2 = np.array([[7, 8, 9], [10, 11, 12]])

In [28]:
np.concatenate([arr1, arr2], axis = 0)

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [29]:
np.concatenate([arr1, arr2], axis = 1)

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [30]:
np.vstack((arr1, arr2))

array([[ 1,  2,  3],
       [ 4,  5,  6],
       [ 7,  8,  9],
       [10, 11, 12]])

In [31]:
np.hstack((arr1, arr2))

array([[ 1,  2,  3,  7,  8,  9],
       [ 4,  5,  6, 10, 11, 12]])

In [32]:
from numpy.random import randn

In [33]:
arr = randn(5, 2)

In [34]:
arr

array([[-0.08100025, -0.51548936],
       [-0.05745642,  2.18398566],
       [-0.65335521,  0.75673303],
       [-1.51419849, -1.21754468],
       [ 1.02175543,  2.14571786]])

In [35]:
first, second, third = np.split(arr, [1, 3])

In [36]:
first

array([[-0.08100025, -0.51548936]])

In [37]:
second

array([[-0.05745642,  2.18398566],
       [-0.65335521,  0.75673303]])

In [38]:
third

array([[-1.51419849, -1.21754468],
       [ 1.02175543,  2.14571786]])

### 堆叠辅助类: r_和c_　

In [39]:
arr = np.arange(6)

In [40]:
arr1 = arr.reshape((3, 2))

In [41]:
arr2 = randn(3, 2)

In [42]:
np.r_[arr1, arr2]

array([[ 0.        ,  1.        ],
       [ 2.        ,  3.        ],
       [ 4.        ,  5.        ],
       [-0.93190794, -1.31925677],
       [ 1.30759799, -0.00883668],
       [ 1.13281789, -1.32841209]])

In [43]:
np.c_[np.r_[arr1, arr2], arr]

array([[ 0.        ,  1.        ,  0.        ],
       [ 2.        ,  3.        ,  1.        ],
       [ 4.        ,  5.        ,  2.        ],
       [-0.93190794, -1.31925677,  3.        ],
       [ 1.30759799, -0.00883668,  4.        ],
       [ 1.13281789, -1.32841209,  5.        ]])

In [44]:
np.c_[1: 6, -10: -5]

array([[  1, -10],
       [  2,  -9],
       [  3,  -8],
       [  4,  -7],
       [  5,  -6]])

## 元素的重复操作: tile和repeat

In [45]:
arr = np.arange(3)

In [46]:
arr.repeat(3)

array([0, 0, 0, 1, 1, 1, 2, 2, 2])

In [47]:
arr.repeat([2, 3, 4])

array([0, 0, 1, 1, 1, 2, 2, 2, 2])

In [48]:
arr = randn(2, 2)

In [49]:
arr

array([[-0.76458364, -1.6372881 ],
       [-0.52084446,  1.41984098]])

In [50]:
arr.repeat(2, axis = 0)

array([[-0.76458364, -1.6372881 ],
       [-0.76458364, -1.6372881 ],
       [-0.52084446,  1.41984098],
       [-0.52084446,  1.41984098]])

In [51]:
arr.repeat([2, 3], axis = 0)

array([[-0.76458364, -1.6372881 ],
       [-0.76458364, -1.6372881 ],
       [-0.52084446,  1.41984098],
       [-0.52084446,  1.41984098],
       [-0.52084446,  1.41984098]])

In [52]:
arr.repeat([2, 3], axis = 1)

array([[-0.76458364, -0.76458364, -1.6372881 , -1.6372881 , -1.6372881 ],
       [-0.52084446, -0.52084446,  1.41984098,  1.41984098,  1.41984098]])

In [53]:
arr

array([[-0.76458364, -1.6372881 ],
       [-0.52084446,  1.41984098]])

In [54]:
np.tile(arr, 2)

array([[-0.76458364, -1.6372881 , -0.76458364, -1.6372881 ],
       [-0.52084446,  1.41984098, -0.52084446,  1.41984098]])

In [55]:
arr

array([[-0.76458364, -1.6372881 ],
       [-0.52084446,  1.41984098]])

In [56]:
np.tile(arr, (2, 1))

array([[-0.76458364, -1.6372881 ],
       [-0.52084446,  1.41984098],
       [-0.76458364, -1.6372881 ],
       [-0.52084446,  1.41984098]])

In [57]:
np.tile(arr, (3, 2))

array([[-0.76458364, -1.6372881 , -0.76458364, -1.6372881 ],
       [-0.52084446,  1.41984098, -0.52084446,  1.41984098],
       [-0.76458364, -1.6372881 , -0.76458364, -1.6372881 ],
       [-0.52084446,  1.41984098, -0.52084446,  1.41984098],
       [-0.76458364, -1.6372881 , -0.76458364, -1.6372881 ],
       [-0.52084446,  1.41984098, -0.52084446,  1.41984098]])

## 花式索引的等价函数: take和put

In [58]:
arr = np.arange(10) * 100

In [59]:
inds = [7, 1, 2, 6]

In [60]:
arr[inds]

array([700, 100, 200, 600])

In [61]:
arr.take(inds)

array([700, 100, 200, 600])

In [62]:
arr.put(inds, 42)

In [63]:
arr

array([  0,  42,  42, 300, 400, 500,  42,  42, 800, 900])

In [64]:
arr.put(inds, [40, 41, 42, 43])

In [65]:
arr

array([  0,  41,  42, 300, 400, 500,  43,  40, 800, 900])

In [66]:
inds = [2, 0, 2, 1]

In [67]:
arr = randn(2, 4)

In [68]:
arr

array([[-0.11295883,  0.59393145,  1.17510748,  0.37694287],
       [ 0.54308016,  0.57934738,  1.82437326,  0.63868548]])

In [69]:
arr.take(inds, axis = 1)

array([[ 1.17510748, -0.11295883,  1.17510748,  0.59393145],
       [ 1.82437326,  0.54308016,  1.82437326,  0.57934738]])

In [70]:
# performance issue
arr = randn(1000, 50)

inds = np.random.permutation(1000)[:500]

%timeit arr[inds]

%timeit arr.take(inds, axis = 0)

The slowest run took 6.35 times longer than the fastest. This could mean that an intermediate result is being cached.
10000 loops, best of 3: 21.6 µs per loop
The slowest run took 4.93 times longer than the fastest. This could mean that an intermediate result is being cached.
10000 loops, best of 3: 25.5 µs per loop


# 广播

In [71]:
arr = np.arange(5)

In [72]:
arr

array([0, 1, 2, 3, 4])

In [73]:
arr * 4

array([ 0,  4,  8, 12, 16])

In [74]:
arr = randn(4, 3)

In [75]:
arr.mean(0)

array([ 0.00390383, -0.11915863, -0.14128946])

In [76]:
demeaned = arr - arr.mean(0)

In [77]:
demeaned

array([[ 0.74451874,  0.35225304,  1.39705485],
       [-1.35470171,  0.17276076, -0.30028509],
       [ 1.12942077, -0.43972224, -0.06066863],
       [-0.51923779, -0.08529156, -1.03610113]])

In [78]:
demeaned.mean(0)

array([  0.00000000e+00,   1.38777878e-17,  -5.55111512e-17])

In [79]:
arr

array([[ 0.74842257,  0.23309441,  1.25576538],
       [-1.35079788,  0.05360214, -0.44157455],
       [ 1.1333246 , -0.55888087, -0.2019581 ],
       [-0.51533396, -0.20445019, -1.17739059]])

In [80]:
row_means = arr.mean(1)

In [81]:
row_means.reshape((4, 1))

array([[ 0.74576079],
       [-0.5795901 ],
       [ 0.12416188],
       [-0.63239158]])

In [82]:
demeaned = arr - row_means.reshape((4, 1))

In [83]:
demeaned.mean(1)

array([ -7.40148683e-17,  -5.55111512e-17,  -3.70074342e-17,
         0.00000000e+00])

## 沿其他轴向广播

In [84]:
arr - arr.mean(1).reshape((4, 1))

array([[ 0.00266178, -0.51266638,  0.51000459],
       [-0.77120778,  0.63319224,  0.13801555],
       [ 1.00916272, -0.68304275, -0.32611997],
       [ 0.11705762,  0.42794139, -0.54499901]])

In [85]:
arr = np.zeros((4, 4))

In [86]:
arr_3d = arr[:, np.newaxis, :]

In [87]:
arr_3d.shape

(4, 1, 4)

In [88]:
arr_1d = np.random.normal(size = 3)

In [89]:
arr_1d[:, np.newaxis]

array([[-0.34654027],
       [ 0.53226022],
       [ 0.40446587]])

In [90]:
arr_1d[np.newaxis, :]

array([[-0.34654027,  0.53226022,  0.40446587]])

In [91]:
arr = randn(3, 4, 5)

In [92]:
depth_mean = arr.mean(2)

In [93]:
depth_mean

array([[-0.13363516,  0.62067447,  0.48755314, -0.72370215],
       [ 0.75545872, -0.72430061,  0.53739973,  0.08884483],
       [-0.15376846,  0.14030445, -0.21378735,  0.60686981]])

In [94]:
demeaned = arr - depth_mean[:, :, np.newaxis]

In [95]:
demeaned.mean(2)

array([[  4.44089210e-17,  -4.44089210e-17,   4.44089210e-17,
         -2.22044605e-17],
       [  0.00000000e+00,   4.44089210e-17,  -8.88178420e-17,
          2.77555756e-17],
       [ -1.11022302e-17,   2.22044605e-17,   0.00000000e+00,
         -6.66133815e-17]])

In [96]:
def demean_axis(arr, axis = 0):
    means = arr.mean(axis)
    indexer = [slice(None)] * arr.ndim
    indexer[axis] = np.newaxis
    return arr - means[indexer]

## 通过广播设置数组的值

In [97]:
arr = np.zeros((4, 3))

In [98]:
arr[:] = 5

In [99]:
arr

array([[ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.],
       [ 5.,  5.,  5.]])

In [100]:
col = np.array([1.28, -.42, .44, 1.6])

In [101]:
arr[:] = col[:, np.newaxis]

In [102]:
arr

array([[ 1.28,  1.28,  1.28],
       [-0.42, -0.42, -0.42],
       [ 0.44,  0.44,  0.44],
       [ 1.6 ,  1.6 ,  1.6 ]])

In [103]:
arr[:2] = [[-1.37], [.509]]

In [104]:
arr

array([[-1.37 , -1.37 , -1.37 ],
       [ 0.509,  0.509,  0.509],
       [ 0.44 ,  0.44 ,  0.44 ],
       [ 1.6  ,  1.6  ,  1.6  ]])

# ufunc高级应用
## ufunc实例方法

In [105]:
arr = np.arange(10)

In [106]:
np.add.reduce(arr)

45

In [107]:
arr.sum()

45

In [108]:
arr = randn(5, 5)

In [109]:
arr[::2].sort(1)

In [110]:
arr[:, :-1] < arr[:, 1:]

array([[ True,  True,  True,  True],
       [False,  True,  True, False],
       [ True,  True,  True,  True],
       [False, False,  True, False],
       [ True,  True,  True,  True]], dtype=bool)

In [111]:
np.logical_and.reduce(arr[:, :-1] < arr[:, 1:], axis = 1)

array([ True, False,  True, False,  True], dtype=bool)

In [112]:
arr = np.arange(15).reshape((3, 5))

In [113]:
np.add.accumulate(arr, axis = 1)

array([[ 0,  1,  3,  6, 10],
       [ 5, 11, 18, 26, 35],
       [10, 21, 33, 46, 60]])

In [114]:
arr = np.arange(3).repeat([1,2,2])

In [115]:
arr

array([0, 1, 1, 2, 2])

In [116]:
np.multiply.outer(arr, np.arange(5))

array([[0, 0, 0, 0, 0],
       [0, 1, 2, 3, 4],
       [0, 1, 2, 3, 4],
       [0, 2, 4, 6, 8],
       [0, 2, 4, 6, 8]])

In [117]:
result = np.subtract.outer(randn(3, 4), randn(5))

In [118]:
result.shape

(3, 4, 5)

In [119]:
arr = np.arange(10)

In [120]:
np.add.reduceat(arr, [0, 5, 8])

array([10, 18, 17])

In [121]:
arr = np.multiply.outer(np.arange(4), np.arange(5))

In [122]:
arr

array([[ 0,  0,  0,  0,  0],
       [ 0,  1,  2,  3,  4],
       [ 0,  2,  4,  6,  8],
       [ 0,  3,  6,  9, 12]])

In [123]:
np.add.reduceat(arr, [0, 2, 4], axis = 1)

array([[ 0,  0,  0],
       [ 1,  5,  4],
       [ 2, 10,  8],
       [ 3, 15, 12]])

## 自定义ufunc

In [124]:
def add_elements(x, y):
    return x + y

In [125]:
add_them = np.frompyfunc(add_elements, 2, 1)

In [126]:
add_them(np.arange(8), np.arange(8))

array([0, 2, 4, 6, 8, 10, 12, 14], dtype=object)

In [127]:
add_them = np.vectorize(add_elements, otypes = [np.float64])

In [128]:
add_them(np.arange(8), np.arange(8))

array([  0.,   2.,   4.,   6.,   8.,  10.,  12.,  14.])

In [129]:
arr = randn(10000)

In [130]:
%timeit add_them(arr, arr)

1000 loops, best of 3: 1.6 ms per loop


In [131]:
%timeit np.add(arr, arr)

The slowest run took 5.94 times longer than the fastest. This could mean that an intermediate result is being cached.
100000 loops, best of 3: 4.7 µs per loop


# 结构化和记录式数组

In [132]:
dtype = [('X', np.float64), ('Y', np.int32)]

In [133]:
sarr = np.array([(1.5, 6), (np.pi, -2)], dtype = dtype)

In [134]:
sarr

array([(1.5, 6), (3.141592653589793, -2)], 
      dtype=[('X', '<f8'), ('Y', '<i4')])

In [135]:
sarr[0]

(1.5, 6)

In [136]:
sarr[0]['X']

1.5

In [137]:
sarr['X']

array([ 1.5       ,  3.14159265])

## 嵌套dtype和多维字段

In [138]:
dtype = [('X', np.float64, 3), ('Y', np.int32)]

In [139]:
arr = np.zeros(4, dtype=dtype)

In [140]:
arr

array([([0.0, 0.0, 0.0], 0), ([0.0, 0.0, 0.0], 0), ([0.0, 0.0, 0.0], 0),
       ([0.0, 0.0, 0.0], 0)], 
      dtype=[('X', '<f8', (3,)), ('Y', '<i4')])

In [141]:
arr[0]['X']

array([ 0.,  0.,  0.])

In [142]:
arr['X']

array([[ 0.,  0.,  0.],
       [ 0.,  0.,  0.],
       [ 0.,  0.,  0.],
       [ 0.,  0.,  0.]])

In [143]:
dtype = [('X', [('a', 'f8'), ('b', 'f4')]), ('Y', np.int32)]

In [144]:
data = np.array([((1, 2), 5), ((3, 4), 6)], dtype=dtype)

In [145]:
data['X']

array([(1.0, 2.0), (3.0, 4.0)], 
      dtype=[('a', '<f8'), ('b', '<f4')])

In [146]:
data['Y']

array([5, 6], dtype=int32)

In [147]:
data['X']['a']

array([ 1.,  3.])

# 更多有关排序的话题

In [148]:
arr = randn(6)

In [149]:
arr.sort()

In [150]:
arr

array([-0.94927652, -0.8225558 , -0.31542587,  0.0707895 ,  0.3321788 ,
        1.39066279])

In [151]:
arr = randn(3, 5)

In [152]:
arr

array([[-1.84156884, -2.19863209,  1.31770235, -0.31937697,  0.58058642],
       [ 0.55028772, -1.28501685,  0.0580552 , -0.91911894, -0.72310615],
       [-0.12850419,  0.37884159, -0.82821607, -0.14751682,  1.27855828]])

In [153]:
arr[:, 0].sort()

In [154]:
arr

array([[-1.84156884, -2.19863209,  1.31770235, -0.31937697,  0.58058642],
       [-0.12850419, -1.28501685,  0.0580552 , -0.91911894, -0.72310615],
       [ 0.55028772,  0.37884159, -0.82821607, -0.14751682,  1.27855828]])

In [155]:
arr = randn(5)

In [156]:
arr

array([ 1.6425305 , -0.04907801,  0.25818282, -0.71636701, -0.22038554])

In [157]:
np.sort(arr)

array([-0.71636701, -0.22038554, -0.04907801,  0.25818282,  1.6425305 ])

In [158]:
arr

array([ 1.6425305 , -0.04907801,  0.25818282, -0.71636701, -0.22038554])

In [159]:
arr = randn(3, 5)

In [160]:
arr

array([[ 1.05908647, -0.17037686, -0.17965148,  0.47477048,  1.43153104],
       [-1.32536879, -1.00714865,  1.29628674,  0.25644544,  0.21723046],
       [ 0.22980771,  1.08888909,  0.64554904,  0.09419733, -2.0820247 ]])

In [161]:
arr.sort(axis = 1)

In [162]:
arr

array([[-0.17965148, -0.17037686,  0.47477048,  1.05908647,  1.43153104],
       [-1.32536879, -1.00714865,  0.21723046,  0.25644544,  1.29628674],
       [-2.0820247 ,  0.09419733,  0.22980771,  0.64554904,  1.08888909]])

In [163]:
arr[:, ::-1]

array([[ 1.43153104,  1.05908647,  0.47477048, -0.17037686, -0.17965148],
       [ 1.29628674,  0.25644544,  0.21723046, -1.00714865, -1.32536879],
       [ 1.08888909,  0.64554904,  0.22980771,  0.09419733, -2.0820247 ]])

## 简介排序: argsort和lexsort

In [164]:
values = np.array([5, 0, 1, 3, 2])

In [165]:
indexer = values.argsort()

In [166]:
indexer

array([1, 2, 4, 3, 0])

In [167]:
values[indexer]

array([0, 1, 2, 3, 5])

In [168]:
arr = randn(3, 5)

In [169]:
arr[0] = values

In [170]:
arr[:, arr[0].argsort()]

array([[ 0.        ,  1.        ,  2.        ,  3.        ,  5.        ],
       [-0.05147713,  0.29753637,  0.05343106,  0.80959092, -0.15062778],
       [-1.81222691,  1.40098024, -0.3229831 ,  1.0361198 ,  0.13064349]])

In [171]:
first_name = np.array(['Bob', 'Jane', 'Steve', 'Bill', 'Barbara'])

In [172]:
last_name = np.array(['Jones', 'Arnold', 'Arnold', 'Jones', 'Walters'])

In [173]:
sorter = np.lexsort((first_name, last_name))

In [174]:
zip(last_name[sorter], first_name[sorter])

[('Arnold', 'Jane'),
 ('Arnold', 'Steve'),
 ('Jones', 'Bill'),
 ('Jones', 'Bob'),
 ('Walters', 'Barbara')]

# 其他排序算法

In [175]:
values = np.array(['2:first', '2:second', '1:first', '1:second', '1:third'])

In [176]:
key = np.array([2, 2, 1, 1, 1])

In [177]:
indexer = key.argsort(kind = 'mergesort')

In [178]:
indexer

array([2, 3, 4, 0, 1])

In [179]:
values.take(indexer)

array(['1:first', '1:second', '1:third', '2:first', '2:second'], 
      dtype='|S8')

## numpy.searchsorted: 在有序数组中查找元素

In [180]:
arr = np.array([0, 1, 7, 12, 15])

In [181]:
arr.searchsorted(9)

3

In [182]:
arr.searchsorted([0, 8, 11, 16])

array([0, 3, 3, 5])

In [183]:
arr = np.array([0, 0, 0, 1, 1, 1, 1])

In [184]:
arr.searchsorted([0, 1])

array([0, 3])

In [185]:
arr.searchsorted([0, 1], side = 'right')

array([3, 7])

In [186]:
data = np.floor(np.random.uniform(0, 10000, size = 50))

In [187]:
bins = np.array([0, 100, 1000, 5000, 10000])

In [188]:
data

array([ 6658.,  5525.,  7879.,  6713.,  8999.,  9679.,  6765.,  4648.,
        6393.,   742.,  5381.,  1115.,   687.,  4003.,  5123.,  7467.,
        2630.,  1094.,  3898.,   323.,  4746.,  7535.,  6503.,  7159.,
        8367.,  5449.,  1798.,  1925.,  8133.,  5468.,  6555.,  2674.,
        2519.,  2756.,   451.,  7341.,  1462.,  3296.,  7376.,  2228.,
        7124.,  5081.,  8959.,  2917.,  6399.,  3199.,  9566.,  9429.,
        8985.,  1392.])

In [189]:
labels = bins.searchsorted(data)

In [190]:
labels

array([4, 4, 4, 4, 4, 4, 4, 3, 4, 2, 4, 3, 2, 3, 4, 4, 3, 3, 3, 2, 3, 4, 4,
       4, 4, 4, 3, 3, 4, 4, 4, 3, 3, 3, 2, 4, 3, 3, 4, 3, 4, 4, 4, 3, 4, 3,
       4, 4, 4, 3])

In [191]:
import pandas as pd

In [192]:
pd.Series(data).groupby(labels).mean()

2     550.750000
3    2683.333333
4    7214.678571
dtype: float64

In [193]:
np.digitize(data, bins)

array([4, 4, 4, 4, 4, 4, 4, 3, 4, 2, 4, 3, 2, 3, 4, 4, 3, 3, 3, 2, 3, 4, 4,
       4, 4, 4, 3, 3, 4, 4, 4, 3, 3, 3, 2, 4, 3, 3, 4, 3, 4, 4, 4, 3, 4, 3,
       4, 4, 4, 3])

# NumPy的matrix类

In [194]:
X =  np.array([[ 8.82768214,  3.82222409, -1.14276475,  2.04411587],
               [ 3.82222409,  6.75272284,  0.83909108,  2.08293758],
               [-1.14276475,  0.83909108,  5.01690521,  0.79573241],
               [ 2.04411587,  2.08293758,  0.79573241,  6.24095859]])

In [195]:
X[:, 0]

array([ 8.82768214,  3.82222409, -1.14276475,  2.04411587])

In [196]:
y = X[:, :1]

In [197]:
X

array([[ 8.82768214,  3.82222409, -1.14276475,  2.04411587],
       [ 3.82222409,  6.75272284,  0.83909108,  2.08293758],
       [-1.14276475,  0.83909108,  5.01690521,  0.79573241],
       [ 2.04411587,  2.08293758,  0.79573241,  6.24095859]])

In [198]:
y

array([[ 8.82768214],
       [ 3.82222409],
       [-1.14276475],
       [ 2.04411587]])

In [199]:
np.dot(y.T, np.dot(X, y))

array([[ 1195.46796121]])

In [200]:
Xm = np.matrix(X)

In [201]:
ym = Xm[:, 0]

In [202]:
Xm

matrix([[ 8.82768214,  3.82222409, -1.14276475,  2.04411587],
        [ 3.82222409,  6.75272284,  0.83909108,  2.08293758],
        [-1.14276475,  0.83909108,  5.01690521,  0.79573241],
        [ 2.04411587,  2.08293758,  0.79573241,  6.24095859]])

In [203]:
ym

matrix([[ 8.82768214],
        [ 3.82222409],
        [-1.14276475],
        [ 2.04411587]])

In [204]:
ym.T * Xm * ym

matrix([[ 1195.46796121]])

In [205]:
Xm.I * X

matrix([[  1.00000000e+00,   8.32667268e-17,  -5.89805982e-17,
           8.32667268e-17],
        [ -6.93889390e-17,   1.00000000e+00,   6.93889390e-18,
          -8.32667268e-17],
        [  1.11022302e-16,   1.38777878e-17,   1.00000000e+00,
           0.00000000e+00],
        [  5.55111512e-17,   1.11022302e-16,   0.00000000e+00,
           1.00000000e+00]])

# 高级数组输入输出
## 内存映像文件

In [206]:
mmap = np.memmap('mymmap', dtype = 'float64', mode = 'w+', shape = (10000, 10000))

In [207]:
mmap

memmap([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       ..., 
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

In [208]:
section = mmap[:5]

In [209]:
section

memmap([[ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.],
       [ 0.,  0.,  0., ...,  0.,  0.,  0.]])

In [210]:
section[:] = np.random.randn(5, 10000)

In [211]:
mmap.flush()

In [212]:
mmap

memmap([[-0.01493482,  0.11314984, -0.52913244, ..., -0.63386752,
        -0.31628287, -1.35588483],
       [ 0.77668056,  1.36957004, -0.10856032, ..., -0.91688729,
         0.65350586,  2.06799642],
       [ 0.67558139, -0.00344003, -1.14905612, ..., -0.46009178,
        -0.09079171,  0.87712429],
       ..., 
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ]])

In [213]:
del mmap

In [214]:
mmap = np.memmap('mymmap', dtype = 'float64', shape = (10000, 10000))

In [215]:
mmap

memmap([[-0.01493482,  0.11314984, -0.52913244, ..., -0.63386752,
        -0.31628287, -1.35588483],
       [ 0.77668056,  1.36957004, -0.10856032, ..., -0.91688729,
         0.65350586,  2.06799642],
       [ 0.67558139, -0.00344003, -1.14905612, ..., -0.46009178,
        -0.09079171,  0.87712429],
       ..., 
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ],
       [ 0.        ,  0.        ,  0.        , ...,  0.        ,
         0.        ,  0.        ]])

# 性能建议
## 连续内存的重要性

In [216]:
arr_c = np.ones((1000, 1000), order='C')
arr_f = np.ones((1000, 1000), order='F')

In [217]:
arr_c.flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False

In [218]:
arr_f.flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : True
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False

In [219]:
arr_f.flags.f_contiguous

True

In [220]:
%timeit arr_c.sum(1)

1000 loops, best of 3: 533 µs per loop


In [221]:
%timeit arr_f.sum(1)

1000 loops, best of 3: 567 µs per loop


In [222]:
arr_f.copy('C').flags

  C_CONTIGUOUS : True
  F_CONTIGUOUS : False
  OWNDATA : True
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False

In [223]:
arr_c[:50].flags.contiguous

True

In [224]:
arr_c[:, :50].flags

  C_CONTIGUOUS : False
  F_CONTIGUOUS : False
  OWNDATA : False
  WRITEABLE : True
  ALIGNED : True
  UPDATEIFCOPY : False

# 其他加速手段　Cython

```cython
from numpy cimport ndarray, float64_t

def sum_elements(ndarray[float64_t] arr):
    cdef Py_ssize_t i, n = len(arr)
    cdef float64_t result = 0

    for i in range(n):
        result += arr[i]

    return result
```