# Numpy数组及其索引

In [2]:
from numpy import *

## 产生数组

In [3]:
lst = [0,1,2,3]
a = array(lst)
a

array([0, 1, 2, 3])

## 数组属性

In [4]:
type(a)

numpy.ndarray

In [5]:
a.dtype # 数据类型-64比特整数

dtype('int64')

In [6]:
a.itemsize # 每个元素所占字节

8

In [7]:
a.shape # 数组元素数目

(4,)

In [8]:
shape(a)

(4,)

In [9]:
shape([1,2,3,4])

(4,)

In [10]:
a.size

4

In [11]:
size(a)

4

In [12]:
a.nbytes # 所有元素所占空间（数组所占空间要大于这个数，因为数组需要一个header来保存shape，dtype这样的信息）

32

In [13]:
a.ndim # 数组维数

1

## 使用fill方法设定初始值

In [15]:
a.fill(-4.8)
a

array([-4, -4, -4, -4])

## 索引和切片

In [19]:
a = array([0,1,2,3])
a[0]

0

In [20]:
a[0] = 10
a

array([10,  1,  2,  3])

In [22]:
a = array([11,12,13,14,15])
a[1:3]

array([12, 13])

In [23]:
a[1:-2]

array([12, 13])

In [24]:
a[-4:3]

array([12, 13])

In [25]:
a[::2]

array([11, 13, 15])

In [26]:
a[-2:]

array([14, 15])

In [27]:
od = array([21000,21180,21240,22100,22400])

In [28]:
dist = od[1:] - od[:-1] # 每天行程=第一天之后的数组-最后一天之前的数组
dist

array([180,  60, 860, 300])

在本质上，python会将array的各种计算转换为类似这样的C代码：
``` C
    int compute_sum(int *arr, int N) {
        int sum = 0;
        int i;
        for (i = 0; i < N; i++) {
            sum += arr[i];
        }
        return sum;
    }
```

## 多维数组及其属性

In [30]:
a = array([[0,1,2,3],[10,11,12,13]])
a

array([[ 0,  1,  2,  3],
       [10, 11, 12, 13]])

In [31]:
a.shape

(2, 4)

In [32]:
a.size

8

In [33]:
a.ndim

2

## 多维数组索引

In [34]:
a[1,3] # 事实上，python会将它们看成一个元组(1,3)，然后按照顺序进行对应

13

In [36]:
a[1,3] = -1
a

array([[ 0,  1,  2,  3],
       [10, 11, 12, -1]])

In [37]:
a[1]

array([10, 11, 12, -1])

## 多维数组切片

In [39]:
a = array([[ 0, 1, 2, 3, 4, 5],
           [10,11,12,13,14,15],
           [20,21,22,23,24,25],
           [30,31,32,33,34,35],
           [40,41,42,43,44,45],
           [50,51,52,53,54,55]])
a

array([[ 0,  1,  2,  3,  4,  5],
       [10, 11, 12, 13, 14, 15],
       [20, 21, 22, 23, 24, 25],
       [30, 31, 32, 33, 34, 35],
       [40, 41, 42, 43, 44, 45],
       [50, 51, 52, 53, 54, 55]])

In [40]:
a[0,3:5]

array([3, 4])

In [41]:
a[4:,4:]

array([[44, 45],
       [54, 55]])

In [42]:
a[:,2]

array([ 2, 12, 22, 32, 42, 52])

In [43]:
a[2::2,::2]

array([[20, 22, 24],
       [40, 42, 44]])

## 切片是引用

In [45]:
a = array([0,1,2,3,4])
b = a[2:4]
print(b)

[2 3]


切片在内存中使用的是引用机制。\
这意味着，python并没有为b分配新的内存空间来存储他的值，而是让b指向了a所分配的内存空间，因此，改变b会改变a的值。

In [46]:
b[0] = 10
a

array([ 0,  1, 10,  3,  4])

而这种现象在列表中并不会出现：

In [48]:
a = [1,2,3,4,5]
b = a[2:3]
b[0] = 12324
a

[1, 2, 3, 4, 5]

这样做的好处在于，节省时间和内存空间。缺点在于，可能出现改变一个值导致另一个值改变的情况。\
一个解决办法是复制一个值，这个复制的值会申请新的内存：

In [49]:
a = array([0,1,2,3,4])
b = a[2:4].copy()
b[0] = 10
a

array([0, 1, 2, 3, 4])

## 花式索引

切片只能支持连续或者等间隔的切片操作，要想实现任意位置的操作，需要使用花式索引`fancy slicing`。

### 一维花式索引

In [50]:
a = arange(0,80,10)
a

array([ 0, 10, 20, 30, 40, 50, 60, 70])

In [51]:
indices = [1,2,-3]
y = a[indices]
y

array([10, 20, 50])

In [54]:
mask = array([0,1,1,0,0,1,0,0],dtype=bool)
a[mask]

array([10, 20, 50])

In [55]:
from numpy.random import rand
a = rand(10)
a

array([0.52357677, 0.39496297, 0.33287232, 0.0617018 , 0.91535453,
       0.80919887, 0.99048356, 0.19725527, 0.36222038, 0.00872035])

In [56]:
mask = a > 0.5
a[mask]

array([0.52357677, 0.91535453, 0.80919887, 0.99048356])

### 二维花式索引

In [57]:
a = array([[ 0, 1, 2, 3, 4, 5],
           [10,11,12,13,14,15],
           [20,21,22,23,24,25],
           [30,31,32,33,34,35],
           [40,41,42,43,44,45],
           [50,51,52,53,54,55]])
a

array([[ 0,  1,  2,  3,  4,  5],
       [10, 11, 12, 13, 14, 15],
       [20, 21, 22, 23, 24, 25],
       [30, 31, 32, 33, 34, 35],
       [40, 41, 42, 43, 44, 45],
       [50, 51, 52, 53, 54, 55]])

In [58]:
a[(0,1,2,3,4),(1,2,3,4,5)]

array([ 1, 12, 23, 34, 45])

In [59]:
a[3:,[0,2,5]]

array([[30, 32, 35],
       [40, 42, 45],
       [50, 52, 55]])

In [61]:
mask = array([1,0,1,0,0,1],dtype=bool)
a[mask,2] # 第二列

array([ 2, 22, 52])

与切片不同，花式索引返回的是原对象的一个复制而不是引用。

### “不完全”索引

In [62]:
y = a[:3] # 第一行到第3行
y

array([[ 0,  1,  2,  3,  4,  5],
       [10, 11, 12, 13, 14, 15],
       [20, 21, 22, 23, 24, 25]])

In [64]:
condition = array([0,1,1,0,1,0],dtype=bool)
a[condition,:]

array([[10, 11, 12, 13, 14, 15],
       [20, 21, 22, 23, 24, 25],
       [40, 41, 42, 43, 44, 45]])

### 三维花式索引

In [65]:
a = arange(64)
a.shape = 4,4,4
a

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [12, 13, 14, 15]],

       [[16, 17, 18, 19],
        [20, 21, 22, 23],
        [24, 25, 26, 27],
        [28, 29, 30, 31]],

       [[32, 33, 34, 35],
        [36, 37, 38, 39],
        [40, 41, 42, 43],
        [44, 45, 46, 47]],

       [[48, 49, 50, 51],
        [52, 53, 54, 55],
        [56, 57, 58, 59],
        [60, 61, 62, 63]]])

In [67]:
y = a[:,:,[2,-1]] # 所有单元，所有行，第3列到最后一列
y

array([[[ 2,  3],
        [ 6,  7],
        [10, 11],
        [14, 15]],

       [[18, 19],
        [22, 23],
        [26, 27],
        [30, 31]],

       [[34, 35],
        [38, 39],
        [42, 43],
        [46, 47]],

       [[50, 51],
        [54, 55],
        [58, 59],
        [62, 63]]])

### where语句

array([[32, 33, 34, 35],
       [36, 37, 38, 39],
       [40, 41, 42, 43],
       [44, 45, 46, 47]])

### 一维数组

In [72]:
a = array([0,12,5,20])

In [73]:
a > 10

array([False,  True, False,  True])

In [74]:
where(a > 10)

(array([1, 3]),)

where函数会返回所有非零元素的索引，返回值是一个元组。

In [75]:
indices = where(a > 10)
indices = indices[0]
indices

array([1, 3])

In [76]:
indices = where(a > 10)[0]
indices

array([1, 3])

可以直接使用where的返回值进行索引

In [77]:
loc = where(a > 10)
a[loc]

array([12, 20])

### 多维数组

In [81]:
a = array([[0,12,5,20],[1,2,11,15]])
loc = where(a > 10)
loc

(array([0, 0, 1, 1]), array([1, 3, 2, 3]))

`(array([0, 0, 1, 1]), array([1, 3, 2, 3]))`代表：第0行第1列，第0行第3列，第1行第2列，第1行第3列

In [82]:
a[loc]

array([12, 20, 11, 15])

In [83]:
rows,cols = where(a > 10)

In [84]:
rows

array([0, 0, 1, 1])

In [85]:
cols

array([1, 3, 2, 3])

In [86]:
a[rows,cols]

array([12, 20, 11, 15])

In [88]:
a = arange(25)
a.shape = 5,5
a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [89]:
a > 12

array([[False, False, False, False, False],
       [False, False, False, False, False],
       [False, False, False,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True]])

In [90]:
where(a > 12)

(array([2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4]),
 array([3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4]))