In [2]:
import numpy as np 
import pandas as pd 


# Table of Contents
1. [np.array](#np.array)

    [shape](#shape)
    
    [dtype](#dtype)
    
2. [Indexing and Slicing](#Indexing)

    [Boolean indexing](#boolean)
3. [Transpose](#Transpose)

    [3d array transpose](#3dArray)
    
4. [some universal functions](#universal)
 
     [np.modf](#np.modf)
     
     [np.isnan](#isnan)
5. [data processing](#data_processing)

    [np.meshgrid](#np.meshgrid)
    
    [np.where](#np.where)
    
6. [Statistical methods](#Statistical_methods)
 
7.[Sorting](#Sorting)
 
8.[Unique and other set logic](#np.unique)
 
9.[np.n1d](#np.in1d)
     
10.[other](#other_functions)


1. Numpy
====
ndarray, a fast and space-efficient multidimensional array providing vectorized arithmetic operations and sophisticated broadcasting capabilities

## np.array

In [3]:
data = np.array([[1,2,3],[2,3,4]])

In [4]:
data

array([[1, 2, 3],
       [2, 3, 4]])

## shape
===

In [5]:
data.shape

(2, 3)

## dtype
数据的数据类型（所有数据是homogeneous同质的)： dtype
=====

In [6]:
data.dtype

dtype('int64')

数据的dimension: ndim
===

In [7]:
data.ndim

2

Create using zeros and ones
======
高维度，传入使用tuple。

In [8]:
np.zeros(10)

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [9]:
np.ones((3,10))

array([[1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.],
       [1., 1., 1., 1., 1., 1., 1., 1., 1., 1.]])

empty随机生成数组。

In [10]:
np.empty((2,3,2))

array([[[7.90505033e-323, 0.00000000e+000],
        [2.12199579e-314, 4.82337433e+228],
        [5.82381892e-144, 1.16097020e-028]],

       [[9.15566166e-072, 6.36885655e-062],
        [4.90937993e-062, 3.97062373e+246],
        [1.16318408e-028, 1.26752734e-071]]])

In [11]:
np.ones((2,3,4))

array([[[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]],

       [[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]]])

生成连续数字。和range很像，但是生成的是ndarray而不是list.

In [12]:
np.arange(-3,12)

array([-3, -2, -1,  0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11])

ones_like takes another array and produces a ones array of the same shape and dtype.

In [13]:
np.ones_like(np.ones((2,3,4)))

array([[[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]],

       [[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]]])

线性代数中常用的indentity matrix

In [14]:
np.eye(3,3)

array([[1., 0., 0.],
       [0., 1., 0.],
       [0., 0., 1.]])

In [15]:
np.identity(4)

array([[1., 0., 0., 0.],
       [0., 1., 0., 0.],
       [0., 0., 1., 0.],
       [0., 0., 0., 1.]])

控制data type

In [16]:
arr1 = np.array([1,2,3],dtype = np.int32)

In [17]:
arr1.dtype

dtype('int32')

In [18]:
# 强制类型转化
float_arr = arr1.astype(np.float64)
float_arr.dtype

dtype('float64')

In [19]:
arr1 ** 5

array([  1,  32, 243], dtype=int32)

In [20]:
1/arr1

array([1.        , 0.5       , 0.33333333])

In [21]:
arr1 = np.arange(-2,4)
arr1

array([-2, -1,  0,  1,  2,  3])

## Indexing
and slicing
===

In [22]:
arr1[1:3]

array([-1,  0])

In [23]:
arr1[3:5] = 0

In [24]:
arr1

array([-2, -1,  0,  0,  0,  3])

需要注意的是，切片是对原数组的显示，并不是复制，因此修改切片，等同于在修改原来的数据。
====

In [25]:
arr_slice = arr1[1:3]

In [26]:
arr_slice[1] = 12345

In [27]:
arr1

array([   -2,    -1, 12345,     0,     0,     3])

In [28]:
arr_slice

array([   -1, 12345])

In [29]:
arr_slice[:] = -1

In [30]:
arr1

array([-2, -1, -1,  0,  0,  3])

In [31]:
arr2d = np.array([[1,2,3],[2,3,4],[5,6,7]])

In [32]:
arr2d[2]

array([5, 6, 7])

跟matblab是一样的。前面行数，后面列数。

In [33]:
arr2d[2][1]

6

In [34]:
arr3d = np.array([[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]])

In [35]:
arr3d

array([[[ 1,  2,  3],
        [ 4,  5,  6]],

       [[ 7,  8,  9],
        [10, 11, 12]]])

下面的方法相当于是访问坐标开头是1，0的点。如果看做三维坐标的话。

In [36]:
arr3d[1,0]

array([7, 8, 9])

In [37]:
arr3d[1,0,1]

8

In [38]:
arr2d[:2,1:]

array([[2, 3],
       [3, 4]])

## boolean 
indexing
===

In [39]:
data = np.random.randn(7,4)

In [40]:
data

array([[-1.59677237,  0.08744915,  1.83955432, -0.64842387],
       [-0.70055019,  1.23655734,  0.79043262, -1.30246783],
       [ 0.43092198, -1.64814159,  0.58942781, -1.56558513],
       [-1.96841216, -0.0583908 ,  0.60978593, -0.91911435],
       [ 1.37718635,  1.83322743, -1.40671968, -0.09450981],
       [ 0.32503326, -0.40117466,  0.57301664,  1.88857564],
       [-0.052564  , -1.12924891,  0.69606116, -1.75057112]])

In [41]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])

In [42]:
names == 'Bob'

array([ True, False, False,  True, False, False, False])

In [43]:
data[names == 'Bob']

array([[-1.59677237,  0.08744915,  1.83955432, -0.64842387],
       [-1.96841216, -0.0583908 ,  0.60978593, -0.91911435]])

In [44]:
data[names == 'Bob', 2:]

array([[ 1.83955432, -0.64842387],
       [ 0.60978593, -0.91911435]])

In [45]:
data[(names != 'Bob')]

array([[-0.70055019,  1.23655734,  0.79043262, -1.30246783],
       [ 0.43092198, -1.64814159,  0.58942781, -1.56558513],
       [ 1.37718635,  1.83322743, -1.40671968, -0.09450981],
       [ 0.32503326, -0.40117466,  0.57301664,  1.88857564],
       [-0.052564  , -1.12924891,  0.69606116, -1.75057112]])

In [46]:
mask= (names == 'Bob') | (names == 'Will')
data[mask]

array([[-1.59677237,  0.08744915,  1.83955432, -0.64842387],
       [ 0.43092198, -1.64814159,  0.58942781, -1.56558513],
       [-1.96841216, -0.0583908 ,  0.60978593, -0.91911435],
       [ 1.37718635,  1.83322743, -1.40671968, -0.09450981]])

In [47]:
data[data < 0] = 0

In [48]:
data

array([[0.        , 0.08744915, 1.83955432, 0.        ],
       [0.        , 1.23655734, 0.79043262, 0.        ],
       [0.43092198, 0.        , 0.58942781, 0.        ],
       [0.        , 0.        , 0.60978593, 0.        ],
       [1.37718635, 1.83322743, 0.        , 0.        ],
       [0.32503326, 0.        , 0.57301664, 1.88857564],
       [0.        , 0.        , 0.69606116, 0.        ]])

In [49]:
data[names != 'Joe'] = 9

In [50]:
data

array([[9.        , 9.        , 9.        , 9.        ],
       [0.        , 1.23655734, 0.79043262, 0.        ],
       [9.        , 9.        , 9.        , 9.        ],
       [9.        , 9.        , 9.        , 9.        ],
       [9.        , 9.        , 9.        , 9.        ],
       [0.32503326, 0.        , 0.57301664, 1.88857564],
       [0.        , 0.        , 0.69606116, 0.        ]])

data[]里面可以传入一个array。正负都可以，就跟list一样。

In [51]:
data[[1,3,4]]

array([[0.        , 1.23655734, 0.79043262, 0.        ],
       [9.        , 9.        , 9.        , 9.        ],
       [9.        , 9.        , 9.        , 9.        ]])

In [52]:
data[[-7,-1]]

array([[9.        , 9.        , 9.        , 9.        ],
       [0.        , 0.        , 0.69606116, 0.        ]])

In [53]:
arr = np.arange(32).reshape((8,4))
arr

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23],
       [24, 25, 26, 27],
       [28, 29, 30, 31]])

输入两个array，就像是一组组坐标。

In [54]:
arr[[1,2,3],[3,0,1]]

array([ 7,  8, 13])

下面这个方法很神奇，他是把第二个array放在了外面，就像重新给每一个元素定了位置。既可以给行改，又可以给列改！

In [55]:
arr[[1,5,7,2]]

array([[ 4,  5,  6,  7],
       [20, 21, 22, 23],
       [28, 29, 30, 31],
       [ 8,  9, 10, 11]])

In [56]:
arr[[1,5,7,2]][:,[0,3,1,2]]

array([[ 4,  7,  5,  6],
       [20, 23, 21, 22],
       [28, 31, 29, 30],
       [ 8, 11,  9, 10]])

In [57]:
arr[[1,5,7,2]][[0,3,1,2],:]

array([[ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [20, 21, 22, 23],
       [28, 29, 30, 31]])

**np.ix_**下面这个方法是可以选择行，然后重新定位每一个位置。下面例子，选出了1，5，7，2行，然后改了位置。

In [58]:
arr[np.ix_([1,5,7,2],[1,0,3,2])]

array([[ 5,  4,  7,  6],
       [21, 20, 23, 22],
       [29, 28, 31, 30],
       [ 9,  8, 11, 10]])

<A NAME="transpose">Transposing Arrays and Swapping Axes</a>
===

In [59]:
arr = np.arange(15).reshape((3,5))

In [60]:
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

## Transpose
使用arr.T 或者np.transpose(arr)

In [61]:
arr.T

array([[ 0,  5, 10],
       [ 1,  6, 11],
       [ 2,  7, 12],
       [ 3,  8, 13],
       [ 4,  9, 14]])

In [62]:
np.transpose(arr)
arr

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

np.dot
====

In [63]:
arr = np.random.randn(6,3)

In [64]:
np.dot(arr.T,arr)

array([[ 3.92881695, -0.69878605, -0.24487676],
       [-0.69878605,  4.39598987,  3.42091076],
       [-0.24487676,  3.42091076,  8.55737901]])

In [65]:
arr = np.arange(16).reshape((2,2,4))
arr

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [66]:
arr.T

array([[[ 0,  8],
        [ 4, 12]],

       [[ 1,  9],
        [ 5, 13]],

       [[ 2, 10],
        [ 6, 14]],

       [[ 3, 11],
        [ 7, 15]]])

In [67]:
arr.transpose((2,0,1))

array([[[ 0,  4],
        [ 8, 12]],

       [[ 1,  5],
        [ 9, 13]],

       [[ 2,  6],
        [10, 14]],

       [[ 3,  7],
        [11, 15]]])

## 3dArray
三维矩阵的转置！！！！！！
====

In [68]:
arr2 = np.arange(16).reshape(2,2,4)           
arr2

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7]],

       [[ 8,  9, 10, 11],
        [12, 13, 14, 15]]])

In [69]:
arr2_t = np.transpose(arr2)
arr2_t

array([[[ 0,  8],
        [ 4, 12]],

       [[ 1,  9],
        [ 5, 13]],

       [[ 2, 10],
        [ 6, 14]],

       [[ 3, 11],
        [ 7, 15]]])

In [70]:
#See '8''s coordinates. before it's (1,0,0)
arr2[1,0,0]

8

In [71]:
#after transpose it's (0,0,1) so this means z axis and y asix swapped so it's (2,1,0) in 
#transpose function!!!
arr2_t[0,0,1]

8

In [72]:
arr2_tt = arr2.transpose((2,1,0))
arr2_tt

array([[[ 0,  8],
        [ 4, 12]],

       [[ 1,  9],
        [ 5, 13]],

       [[ 2, 10],
        [ 6, 14]],

       [[ 3, 11],
        [ 7, 15]]])

因此，我们知道了，三维矩阵的转置就是transpose(x,(2,1,0))把z轴和y轴交换。我们可以从坐标的变化中看出来！所以transpose函数就是根据给定的shape去转换的。比如原来的2*2*3的矩阵，三个轴分别表示为（0，1，2），然后转置的时候写上啥，就是什么坐标互相交换，这样交换后的结果，就是我们转置了的矩阵啦！！再让我们看一个例子，一个2*4*4的矩阵，换成（1，0，2）
===

In [73]:
arr4 = np.arange(32).reshape(2,4,4)
arr4

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [12, 13, 14, 15]],

       [[16, 17, 18, 19],
        [20, 21, 22, 23],
        [24, 25, 26, 27],
        [28, 29, 30, 31]]])

In [74]:
arr4.transpose(1,0,2)

array([[[ 0,  1,  2,  3],
        [16, 17, 18, 19]],

       [[ 4,  5,  6,  7],
        [20, 21, 22, 23]],

       [[ 8,  9, 10, 11],
        [24, 25, 26, 27]],

       [[12, 13, 14, 15],
        [28, 29, 30, 31]]])

就是把z和x的坐标换了一下啊！！例如还是看8

In [75]:
arr4[0,2,0]

8

In [76]:
arr4.transpose(1,0,2)[2,0,0]

8

we can prove this by using swapaxes function.
===

In [77]:
arr4.swapaxes(1,0)

array([[[ 0,  1,  2,  3],
        [16, 17, 18, 19]],

       [[ 4,  5,  6,  7],
        [20, 21, 22, 23]],

       [[ 8,  9, 10, 11],
        [24, 25, 26, 27]],

       [[12, 13, 14, 15],
        [28, 29, 30, 31]]])

成功啦！！就是换axis!!! and we can approve it by using swapaxes....
那么四维呢？？？

In [78]:
arr5 = np.arange(60).reshape(2,3,2,5)
arr5

array([[[[ 0,  1,  2,  3,  4],
         [ 5,  6,  7,  8,  9]],

        [[10, 11, 12, 13, 14],
         [15, 16, 17, 18, 19]],

        [[20, 21, 22, 23, 24],
         [25, 26, 27, 28, 29]]],


       [[[30, 31, 32, 33, 34],
         [35, 36, 37, 38, 39]],

        [[40, 41, 42, 43, 44],
         [45, 46, 47, 48, 49]],

        [[50, 51, 52, 53, 54],
         [55, 56, 57, 58, 59]]]])

我们可以观察28

In [79]:
arr5[0,2,1,3]

28

In [80]:
arr5_t = arr5.T
arr5_t

array([[[[ 0, 30],
         [10, 40],
         [20, 50]],

        [[ 5, 35],
         [15, 45],
         [25, 55]]],


       [[[ 1, 31],
         [11, 41],
         [21, 51]],

        [[ 6, 36],
         [16, 46],
         [26, 56]]],


       [[[ 2, 32],
         [12, 42],
         [22, 52]],

        [[ 7, 37],
         [17, 47],
         [27, 57]]],


       [[[ 3, 33],
         [13, 43],
         [23, 53]],

        [[ 8, 38],
         [18, 48],
         [28, 58]]],


       [[[ 4, 34],
         [14, 44],
         [24, 54]],

        [[ 9, 39],
         [19, 49],
         [29, 59]]]])

In [81]:
arr5_t[3,1,2,0]

28

发现规律了！！就是shape反过来！！！原来是0，1，2，3现在是3，2，1，0。对于三维也适用。

In [82]:
arr5_t.shape

(5, 2, 3, 2)

## universal

## np.modf
返回小数部分和正数部分为两个array
===

In [83]:
arr = np.random.randn(7)*5
arr

array([-6.11768355,  1.43707819, -3.44560286,  3.63351088, -0.71747536,
       -3.5013666 ,  3.4375686 ])

In [84]:
np.modf(arr)

(array([-0.11768355,  0.43707819, -0.44560286,  0.63351088, -0.71747536,
        -0.5013666 ,  0.4375686 ]), array([-6.,  1., -3.,  3., -0., -3.,  3.]))

## isnan

In [85]:
np.isnan(arr)

array([False, False, False, False, False, False, False])

## data_processing


## np.meshgrid

In [86]:
points = np.arange(-5, 5, 1)
points

array([-5, -4, -3, -2, -1,  0,  1,  2,  3,  4])

In [87]:
xs, ys = np.meshgrid(points, points)
xs

array([[-5, -4, -3, -2, -1,  0,  1,  2,  3,  4],
       [-5, -4, -3, -2, -1,  0,  1,  2,  3,  4],
       [-5, -4, -3, -2, -1,  0,  1,  2,  3,  4],
       [-5, -4, -3, -2, -1,  0,  1,  2,  3,  4],
       [-5, -4, -3, -2, -1,  0,  1,  2,  3,  4],
       [-5, -4, -3, -2, -1,  0,  1,  2,  3,  4],
       [-5, -4, -3, -2, -1,  0,  1,  2,  3,  4],
       [-5, -4, -3, -2, -1,  0,  1,  2,  3,  4],
       [-5, -4, -3, -2, -1,  0,  1,  2,  3,  4],
       [-5, -4, -3, -2, -1,  0,  1,  2,  3,  4]])

In [88]:
xs.shape

(10, 10)

In [89]:
import matplotlib.pyplot as plt
z = np.sqrt(xs **2 + ys ** 2)
z

array([[7.07106781, 6.40312424, 5.83095189, 5.38516481, 5.09901951,
        5.        , 5.09901951, 5.38516481, 5.83095189, 6.40312424],
       [6.40312424, 5.65685425, 5.        , 4.47213595, 4.12310563,
        4.        , 4.12310563, 4.47213595, 5.        , 5.65685425],
       [5.83095189, 5.        , 4.24264069, 3.60555128, 3.16227766,
        3.        , 3.16227766, 3.60555128, 4.24264069, 5.        ],
       [5.38516481, 4.47213595, 3.60555128, 2.82842712, 2.23606798,
        2.        , 2.23606798, 2.82842712, 3.60555128, 4.47213595],
       [5.09901951, 4.12310563, 3.16227766, 2.23606798, 1.41421356,
        1.        , 1.41421356, 2.23606798, 3.16227766, 4.12310563],
       [5.        , 4.        , 3.        , 2.        , 1.        ,
        0.        , 1.        , 2.        , 3.        , 4.        ],
       [5.09901951, 4.12310563, 3.16227766, 2.23606798, 1.41421356,
        1.        , 1.41421356, 2.23606798, 3.16227766, 4.12310563],
       [5.38516481, 4.47213595, 3.6055512

In [90]:
plt.imshow(z, cmap = plt.cm.gray); plt.colorbar(); plt.title("Image plot of $\sqrt{x^2 + y^2}$ for a grid of values")

Text(0.5,1,'Image plot of $\\sqrt{x^2 + y^2}$ for a grid of values')

In [91]:
xs**2 + ys**2

array([[50, 41, 34, 29, 26, 25, 26, 29, 34, 41],
       [41, 32, 25, 20, 17, 16, 17, 20, 25, 32],
       [34, 25, 18, 13, 10,  9, 10, 13, 18, 25],
       [29, 20, 13,  8,  5,  4,  5,  8, 13, 20],
       [26, 17, 10,  5,  2,  1,  2,  5, 10, 17],
       [25, 16,  9,  4,  1,  0,  1,  4,  9, 16],
       [26, 17, 10,  5,  2,  1,  2,  5, 10, 17],
       [29, 20, 13,  8,  5,  4,  5,  8, 13, 20],
       [34, 25, 18, 13, 10,  9, 10, 13, 18, 25],
       [41, 32, 25, 20, 17, 16, 17, 20, 25, 32]])

## Expressing conditional logic as array operations

In [92]:
xarr = np.array([1.1, 1.2, 1.3, 1.4, 1.5])

In [93]:
yarr = np.array([2.1, 2.2, 2.3, 2.4, 2.5])

In [94]:
cond = np.array([True, False, True, True, False])

下面代码的意思是，result里面装的东西，当c是true的时候，用x，当 c是false，用y

In [95]:
result = [(x if c else y)
for x, y, c in zip(xarr, yarr, cond)]
result

[1.1, 2.2, 1.3, 1.4, 2.5]

## np.where
下面这个方法更好！使用np.where

In [96]:
result = np.where(cond, xarr, yarr)
result

array([1.1, 2.2, 1.3, 1.4, 2.5])

np.where的好处是，可以根据原矩阵的数值，来得到新的矩阵。

In [97]:
arr = np.random.randn(4,4)
arr

array([[ 0.02898089, -0.84428608, -0.36779058, -0.42359129],
       [ 0.7773167 , -0.45746246,  0.61216003,  0.55246493],
       [ 2.35800646,  0.7733755 , -0.59653538,  0.23957353],
       [-0.91224608,  0.05099432,  1.93672297,  0.01699996]])

arr里大于0，变成2， 

In [98]:
np.where(arr > 0, 2, -2)

array([[ 2, -2, -2, -2],
       [ 2, -2,  2,  2],
       [ 2,  2, -2,  2],
       [-2,  2,  2,  2]])

set only positive values to be 2

In [99]:
np.where(arr > 0, 2, arr)

array([[ 2.        , -0.84428608, -0.36779058, -0.42359129],
       [ 2.        , -0.45746246,  2.        ,  2.        ],
       [ 2.        ,  2.        , -0.59653538,  2.        ],
       [-0.91224608,  2.        ,  2.        ,  2.        ]])

In [100]:
test = np.array([1,2,3,4,-1,3,-5,-9,1])
np.where(test > 1)

(array([1, 2, 3, 5]),)

In [101]:
test = np.array([[1,2,3,4],[-1,3,-5,-9]])
np.where(test > 1)

(array([0, 0, 0, 1]), array([1, 2, 3, 1]))

In [102]:
test[np.where( test > 1 )]

array([2, 3, 4, 3])

np.where 也可以嵌套。

## Statistical_methods

In [103]:
arr = np.random.randn(5, 4)

In [104]:
arr.mean()

0.32356631556096066

In [105]:
np.mean(arr)

0.32356631556096066

In [106]:
arr.sum()

6.471326311219213

1. 1. mean和sum的函数，可以传入一个axis的argument。
以最简单的二维为例。如果axis = 0，那么就是每一列作为一个整体。如果axis = 1,那么就是每一行作为一个整体。

In [107]:
arr = np.array([[1,2,3],[2,3,4]])

In [108]:
arr.mean(axis = 0)

array([1.5, 2.5, 3.5])

In [109]:
arr.sum(axis = 0)

array([3, 5, 7])

In [110]:
arr.mean(axis = 1)

array([2., 3.])

In [111]:
arr.sum(axis = 1)

array([6, 9])

还有一些method，不是把所有数据一起计算的，而是根据axis计算。
cumsum: 计算某一条轴上的累加和。cuprod同理。

In [112]:
arr = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8]])

In [113]:
arr.cumsum(0)

array([[ 0,  1,  2],
       [ 3,  5,  7],
       [ 9, 12, 15]])

In [114]:
arr.cumprod(0)

array([[ 0,  1,  2],
       [ 0,  4, 10],
       [ 0, 28, 80]])

In [115]:
arr.cumsum(1)

array([[ 0,  1,  3],
       [ 3,  7, 12],
       [ 6, 13, 21]])

In [116]:
arr.cumprod(1)

array([[  0,   0,   0],
       [  3,  12,  60],
       [  6,  42, 336]])

其他一些method： std, var, min, max。argmin和argmax是输出第一次遇见的最大最小元素的index。

In [117]:
arr.argmax()

8

Methods for boolean arrays

In [118]:
arr = np.random.randn(10)
arr

array([-0.44009372,  0.96122914,  0.38874541,  0.9306418 ,  0.63345499,
       -1.466116  ,  0.98618269,  0.80357824,  0.360787  , -0.7884637 ])

In [119]:
#正数的个数，而不是求和！！
(arr > 0).sum()

7

## Sorting
可以使用py自带的sort()

In [120]:
arr.sort()
arr

array([-1.466116  , -0.7884637 , -0.44009372,  0.360787  ,  0.38874541,
        0.63345499,  0.80357824,  0.9306418 ,  0.96122914,  0.98618269])

多维的矩阵，可以限定根据哪一个轴去排序。
下面这个例子就是一行一行排序。

In [121]:
arr = np.random.randn(5,3)
arr

array([[-0.23412133, -0.23680159,  1.16188871],
       [ 0.51634367, -1.64838541, -0.21094101],
       [-0.89966821, -0.87221449,  1.28083033],
       [-1.56996756,  2.51283386,  0.31369821],
       [ 0.66164525, -1.08393192, -0.24728652]])

In [122]:
arr.sort(1)
arr

array([[-0.23680159, -0.23412133,  1.16188871],
       [-1.64838541, -0.21094101,  0.51634367],
       [-0.89966821, -0.87221449,  1.28083033],
       [-1.56996756,  0.31369821,  2.51283386],
       [-1.08393192, -0.24728652,  0.66164525]])

In [123]:
#试试每列排序。
arr.sort(0)
arr

array([[-1.64838541, -0.87221449,  0.51634367],
       [-1.56996756, -0.24728652,  0.66164525],
       [-1.08393192, -0.23412133,  1.16188871],
       [-0.89966821, -0.21094101,  1.28083033],
       [-0.23680159,  0.31369821,  2.51283386]])

np.sort()则是in place地排序，而是复制了原矩阵，排序被复制的，原来矩阵不变。如下：

In [124]:
arr = np.random.randn(5,3)
np.sort(arr)

array([[-1.33622891e+00, -6.73539504e-01,  1.13150326e+00],
       [-1.19464207e+00,  1.40236454e-01,  8.23635664e-01],
       [-3.69341006e-01, -1.22342234e-03,  3.78130940e-01],
       [-1.04873656e-01,  4.40983887e-01,  2.46368729e+00],
       [-5.94514333e-01,  1.11591733e+00,  1.98206044e+00]])

In [125]:
arr

array([[-6.73539504e-01, -1.33622891e+00,  1.13150326e+00],
       [ 8.23635664e-01, -1.19464207e+00,  1.40236454e-01],
       [-3.69341006e-01,  3.78130940e-01, -1.22342234e-03],
       [ 4.40983887e-01, -1.04873656e-01,  2.46368729e+00],
       [ 1.98206044e+00,  1.11591733e+00, -5.94514333e-01]])

## Unique and other set logic
这个很重要！！
## np.unique
这个函数返回的结果是sort过的。

In [126]:
names = np.array(['Bob', 'Joe', 'Will', 'Bob', 'Will', 'Joe', 'Joe'])
np.unique(names)

array(['Bob', 'Joe', 'Will'], dtype='<U4')

In [127]:
ints = np.array([3, 3, 3, 2, 2, 1, 1, 4, 4])
np.unique(ints)

array([1, 2, 3, 4])

In [128]:
#对比一下使用python 原生的方法
sorted(set(names))

['Bob', 'Joe', 'Will']

np.in1d的作用是看他的成员在另一个矩阵的出现与否。可以作为一种筛选的方法。
## np.in1d

In [129]:
values = np.array([6, 0, 0, 3, 2, 5, 6])
np.in1d(values, [2,3,6])

array([ True, False, False,  True,  True, False,  True])

## other_functions

np.intersect1d(x,y)能返回共同的元素。
union1d(x,y)返回所有unique元素。
np.setdiff1d(x,y)找出在x但不在y的
setxor1d(x,y)找出只在x或y的元素。


In [130]:
values2 = np.array([6, 1, 1, 4, 2, 5, 6])


In [131]:
np.setdiff1d(values, values2)

array([0, 3])

In [132]:
np.setxor1d(values, values2)

array([0, 1, 3, 4])