## Numpy对数组按索引查询
三种方法：
* 基础索引
* 神奇索引
* 布尔索引

In [1]:
import numpy as np

In [2]:
# 一维向量
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [4]:
# 二维向量，一般用大写字母
X = np.arange(20).reshape(4, 5)
X

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

### 基础索引

#### 一维数组
和Python的List一样

In [5]:
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [6]:
print(x[2], x[5], x[-1])

2 5 9


In [8]:
x[2:4]

array([2, 3])

In [9]:
x[2:-1]

array([2, 3, 4, 5, 6, 7, 8])

In [10]:
x[-3:]

array([7, 8, 9])

In [11]:
x[:-3]

array([0, 1, 2, 3, 4, 5, 6])

#### 二维数组

In [12]:
X

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [13]:
# 分别用航坐标，列坐标，实现行列筛选
# X[0][0]
X[0, 0]

0

In [14]:
X[-1, 2]

17

In [15]:
# 可以省略后续索引值，返回的数据是降低一个维度的数组
# 这里的2，其实是要筛选第2行
X[2]

array([10, 11, 12, 13, 14])

In [16]:
# 筛选-1对应的行
X[-1]

array([15, 16, 17, 18, 19])

In [17]:
# 筛选多行
X[:-1]

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [18]:
# 筛选多行，然后筛选多列
X[:2, 2:4]

array([[2, 3],
       [7, 8]])

In [19]:
# 筛选所有行，然后筛选多列
X[:, 2]

array([ 2,  7, 12, 17])

#### 注意：切片的修改会修改原来的数组
原因：Numpy经常要处理大数组，避免每次都复制

In [20]:
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [21]:
x[2:4]=666
x

array([  0,   1, 666, 666,   4,   5,   6,   7,   8,   9])

In [22]:
X[:1, :2] = 666
X

array([[666, 666,   2,   3,   4],
       [  5,   6,   7,   8,   9],
       [ 10,  11,  12,  13,  14],
       [ 15,  16,  17,  18,  19]])

### 神奇索引
其实就是：用整数数组进行的索引，叫神奇索引

#### 一维数组

In [24]:
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [25]:
x[[3,4,7]]

array([3, 4, 7])

In [27]:
indexs = np.array([[0, 2], [1, 3]])
x[indexs]

array([[0, 2],
       [1, 3]])

#### 实例：获取数组中最大的前N个数字

In [29]:
# 随机生成1到100之间的，10个数字
arr = np.random.randint(1, 100, 10)
arr

array([30, 49, 39, 29, 69, 95, 71, 91, 43, 78])

In [30]:
# arr.argsort()会返回排序后的索引index
# 取最大值对应的3个下标
arr.argsort()[-3:]

array([9, 7, 5])

In [31]:
arr[arr.argsort()[-3:]]

array([78, 91, 95])

#### 二维数组

In [33]:
X = np.arange(20).reshape(4, 5)
X

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [34]:
# 筛选多行，列可以省略
X[[0, 2]]

array([[ 0,  1,  2,  3,  4],
       [10, 11, 12, 13, 14]])

In [35]:
X[[0, 2], :]

array([[ 0,  1,  2,  3,  4],
       [10, 11, 12, 13, 14]])

In [36]:
# 筛选多列，行不能省略
X[:, [0, 2, 3]]

array([[ 0,  2,  3],
       [ 5,  7,  8],
       [10, 12, 13],
       [15, 17, 18]])

In [37]:
# 同时指定行列-列表
# 返回的是[(0,1), (2,3), (3,4)]位置的数字
X[[0, 2, 3], [1, 3, 4]]

array([ 1, 13, 19])

### 布尔索引
注意：布尔索引选择的数据是数组的拷贝

#### 一维数组

In [38]:
# 将数组还原
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [39]:
x > 5

array([False, False, False, False, False, False,  True,  True,  True,
        True])

In [40]:
x[x>5]

array([6, 7, 8, 9])

In [41]:
# 实例：把以为数组进行01化处理
# 比如把房价数字，变成“高房价”为1，“低房价”为0
x[x<=5] = 0
x[x>5] = 1
x

array([0, 0, 0, 0, 0, 0, 1, 1, 1, 1])

In [43]:
x = np.arange(10)
x[x<5] += 20
x

array([20, 21, 22, 23, 24,  5,  6,  7,  8,  9])

#### 二维数组

In [45]:
X = np.arange(20).reshape(4, 5)
X

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [46]:
X > 5

array([[False, False, False, False, False],
       [False,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True]])

In [47]:
# X>5的boolean数组，既有行，又有列
# 因此返回的是（行，列）一维结果
X[X>5]

array([ 6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19])

In [48]:
# 举例：怎样把第3列大于5的行筛选出来
X[:, 3]

array([ 3,  8, 13, 18])

In [50]:
X[:, 3] > 5

array([False,  True,  True,  True])

In [51]:
# 这里是按照行进行的筛选
X[X[:, 3] > 5]

array([[ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19]])

In [52]:
X[X[:, 3] > 5] = 666
X

array([[  0,   1,   2,   3,   4],
       [666, 666, 666, 666, 666],
       [666, 666, 666, 666, 666],
       [666, 666, 666, 666, 666]])

#### 条件的组合

In [53]:
x = np.arange(10)
x

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [54]:
# 注意：每个条件都得加小括号
condition = (x % 2 == 0) | (x > 7)
condition

array([ True, False,  True, False,  True, False,  True, False,  True,
        True])

In [55]:
x[condition]

array([0, 2, 4, 6, 8, 9])