## ``Numpy`` 中的聚合操作

### sum

In [57]:
import numpy as np

L = np.random.random(100)
sum(L)

49.211528214179019

In [58]:
np.sum(L)

49.211528214178998

In [59]:
big_array = np.random.rand(1000000)
%timeit sum(big_array)
%timeit np.sum(big_array)

10 loops, best of 3: 119 ms per loop
1000 loops, best of 3: 646 µs per loop


### min, max

In [60]:
np.min(big_array)

2.1907583922509133e-06

In [61]:
np.max(big_array)

0.99999960745679706

In [62]:
big_array.min()

2.1907583922509133e-06

In [63]:
big_array.max()

0.99999960745679706

In [64]:
big_array.sum()

499684.3731243927

In [65]:
X = np.arange(16).reshape(4,-1)
X

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [66]:
X = np.reshape(np.arange(16), (-1,4))
X

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

### 多维度聚合

In [67]:
X.sum()

120

In [68]:
X.sum(axis=0)

array([24, 28, 32, 36])

In [69]:
X.sum(axis=1)

array([ 6, 22, 38, 54])

注意：axis描述的是将要被压缩的维度。

### 其他聚合操作

In [70]:
X.prod()

0

In [71]:
(X+1).prod()

20922789888000

In [72]:
X.mean()

7.5

In [73]:
np.median(X)

7.5

In [74]:
v = np.array([1, 1, 2, 2, 10])
np.mean(v)

3.2000000000000002

In [75]:
np.median(v)

2.0

In [76]:
np.percentile(big_array, q=50)

0.49957917874321717

In [77]:
np.median(big_array)

0.49957917874321717

In [78]:
np.percentile(big_array, q=100)

0.99999960745679706

In [79]:
np.max(big_array)

0.99999960745679706

In [80]:
for percent in [0, 25, 50, 75, 100]:
    print(np.percentile(big_array, q=percent))

2.19075839225e-06
0.249738102259
0.499579178743
0.749576232137
0.999999607457


In [81]:
np.var(big_array)

0.083300572891263608

In [82]:
np.std(big_array)

0.28861838626682051

In [83]:
x = np.random.normal(0, 1, 1000000)

In [84]:
np.mean(x)

0.00030944910364100127

In [85]:
np.std(x)

0.9992859099726551

In [86]:
np.argmin(x)

319231

In [87]:
x[94728]

0.033676727718448235

In [88]:
x.min()

-4.6247836025685105

In [89]:
np.argmax(x)

683838

### 排序和使用索引

In [90]:
x = np.arange(16)
x

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [91]:
np.random.shuffle(x)
x

array([ 1,  5, 10, 15,  2, 12,  7,  0,  9,  4,  3, 14, 13, 11,  8,  6])

In [92]:
np.sort(x)

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [93]:
x

array([ 1,  5, 10, 15,  2, 12,  7,  0,  9,  4,  3, 14, 13, 11,  8,  6])

In [94]:
x.sort()

In [95]:
x

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [96]:
X

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15]])

In [97]:
X = np.random.randint(10, size=(4,4))
X

array([[1, 2, 9, 5],
       [9, 6, 4, 1],
       [8, 4, 1, 2],
       [6, 3, 1, 0]])

In [98]:
np.sort(X, axis=0)

array([[1, 2, 1, 0],
       [6, 3, 1, 1],
       [8, 4, 4, 2],
       [9, 6, 9, 5]])

In [99]:
np.sort(X, axis=1)

array([[1, 2, 5, 9],
       [1, 4, 6, 9],
       [1, 2, 4, 8],
       [0, 1, 3, 6]])

#### 使用索引

In [100]:
x

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15])

In [101]:
np.random.shuffle(x)

In [102]:
x

array([ 7, 15, 13,  1, 11,  8, 10,  5,  4, 14,  3,  0,  9,  2, 12,  6])

In [103]:
np.argsort(x)

array([11,  3, 13, 10,  8,  7, 15,  0,  5, 12,  6,  4, 14,  2,  9,  1])

In [104]:
np.partition(x, 3)

array([ 1,  2,  0,  3,  5,  4,  6, 15, 10, 14,  8, 11,  9, 13, 12,  7])

In [105]:
np.argpartition(x, 3)

array([ 3, 13, 11, 10,  7,  8, 15,  1,  6,  9,  5,  4, 12,  2, 14,  0])

In [106]:
X

array([[1, 2, 9, 5],
       [9, 6, 4, 1],
       [8, 4, 1, 2],
       [6, 3, 1, 0]])

In [110]:
np.argsort(X, axis=1)

array([[0, 1, 3, 2],
       [3, 2, 1, 0],
       [2, 3, 1, 0],
       [3, 2, 1, 0]])

In [111]:
np.argpartition(X, 2, axis=1)

array([[0, 1, 3, 2],
       [3, 2, 1, 0],
       [2, 3, 1, 0],
       [3, 2, 1, 0]])