# Numpy Arrays and their indexes

In [1]:
import numpy as np

# Produce Arrays

In [2]:
lst=[0,1,2,3]
a = np.array(lst)
a

array([0, 1, 2, 3])

Or we can write this array directly:

In [3]:
a = np.array([0,1,2,3])

# Properties of Arrays

In [4]:
type(a)

numpy.ndarray

In [5]:
# 32 bits int
a.dtype

dtype('int32')

In [6]:
a.itemsize

4

In [7]:
# one-dimension array and return a tuple
a.shape

(4,)

Or use

In [8]:
np.shape(a)

(4,)

Check the number of tuples

In [9]:
a.size

4

Or

In [10]:
np.size(a)

4

In [11]:
# check the space
a.nbytes

16

Note: In fact it make up more than this figure, since it need a header to save the messege of shape, dtype and so on.

In [12]:
# check the dimension
a.ndim

1

# Use "fill-method" to set up the starting value

In [13]:
a.fill(-4.8) #What the meaning of -4.8?
a

array([-4, -4, -4, -4])

However, different arrays need all elments of them are the same.

# Indexes and Slip

In [14]:
a = np.array([0,1,2,3,4,5,6])
a[0]

0

In [15]:
a[0]=10
a

array([10,  1,  2,  3,  4,  5,  6])

In [16]:
a[1:3]

array([1, 2])

In [17]:
#从第二个到倒数第二个
a[1:-2]

array([1, 2, 3, 4])

In [19]:
#从倒数第四个到整数第六个
a[-4:6]

array([3, 4, 5])

In [20]:
#步长为2输出
a[::2]

array([10,  2,  4,  6])

In [21]:
a[-2:]

array([5, 6])

假设我们记录一辆汽车表盘上每天显示的里程数：

In [22]:
od = np.array([21000,21180,21240,22100,22400])

In [24]:
计算每天的里程
dist=od[1:]-od[:-1]
dist

array([180,  60, 860, 300])

在本质上，Python将array的各种计算转换为下列的C代码：
    int compute_sum(int \*arr, int N) {  
        int sum=0;  
        int i;   
        for (i=0; i<N; i++) {  
            sum+=arr[i];  
        }  
        retrun sum;  
      }

# 多维数组

In [28]:
a = np.array([[0,1,2,3],
             [10,11,12,13]])
a

array([[ 0,  1,  2,  3],
       [10, 11, 12, 13]])

Note: 实际上我们是传入了以列表为元素的列表，从而得到一个二维数组。

In [29]:
a.shape

(2, 4)

Note: 2 是行数，4是列数

In [30]:
# check the dimension
a.ndim

2

# 多维数组索引

In [31]:
a[1,3]

13

Note：其中，1是行索引，3是列索引，中间使用逗号隔开，事实上Python会使用元组（1,3）来进行索引

In [32]:
a[1,3]=-1
a

array([[ 0,  1,  2,  3],
       [10, 11, 12, -1]])

In [33]:
# 返回第二行元组组成的array
a[1]

array([10, 11, 12, -1])

# 多维数组切片

In [36]:
a = np.array([[0,1,2,3,4,5],
           [10,11,12,13,14,15],
           [20,21,22,23,24,25],
           [30,31,32,33,34,35],
           [40,41,42,43,44,45],
          [50,51,52,53,54,55]])
a

array([[ 0,  1,  2,  3,  4,  5],
       [10, 11, 12, 13, 14, 15],
       [20, 21, 22, 23, 24, 25],
       [30, 31, 32, 33, 34, 35],
       [40, 41, 42, 43, 44, 45],
       [50, 51, 52, 53, 54, 55]])

In [37]:
a[0,3:5]

array([3, 4])

In [38]:
a[4:,4:]

array([[44, 45],
       [54, 55]])

In [39]:
a[:,2]

array([ 2, 12, 22, 32, 42, 52])

Note: 每一维都支持切片的规则，包括负索引，省略：  
    [lower:upper:step]   
    for example, we want to pick row 3 and row 5 's odd quenes

In [40]:
a[2::2,::2]

array([[20, 22, 24],
       [40, 42, 44]])

# 切片是引用

In [42]:
a = np.array([0,1,2,3,4])
b = a[2:4]
print(b)

[2 3]


引用机制意味着Python并没有为`b`分配新的空间来存储它的值，而是让`b`指向了`a`所分配的内存空间，因此，改变`b`会改变`a`的值：

In [45]:
b[0]=10
a

array([ 0,  1, 10,  3,  4])

In [46]:
b

array([10,  3])

而在列表中这种现象不会存在：

In [48]:
a = [1,2,3,4,5]
b = a[2:4]
b[0]=12321
print(a)
print(b)

[1, 2, 3, 4, 5]
[12321, 4]


这样做的好处是，对于很大的数组，不用大量赋值多余的值，节约了空间。   
    缺点在于，可能出现改变一个值而改变其他值的情况。

可以使用copy()方法产生一个复制（会申请新的内存）

In [49]:
a = np.array([0,1,2,3,4])
b = a[2:4].copy()
b[0]=12321
print(a)
print(b)

[0 1 2 3 4]
[12321     3]


In [50]:
a

array([0, 1, 2, 3, 4])

In [51]:
b

array([12321,     3])

# 花式索引

切片只能支持连续或者等间隔的切片操作，花式索引`facny slicing` 可以实现任意位置的操作

## 一维花式索引


np.arange的使用方法

In [109]:
A = np.arange(5)  # 只有结束项
print(A)  # 结果 [0 1 2 3 4] 结果不包含结束项
print(type(A))  # 结果 <class 'numpy.ndarray'>

A = np.arange(1, 5)  # 起点为1，步长默认为1
print(A)  # 结果 [1 2 3 4]

A = np.arange(1, 5, 2)  # 步长默认为2
print(A)  # 结果 [1 3]

A = np.arange(1, 5.2, 0.6)  # 浮点数参数，结果就不一定完全符合了
print(A)  # 结果 [1.  1.6 2.2 2.8 3.4 4.  4.6 5.2]


[0 1 2 3 4]
<class 'numpy.ndarray'>
[1 2 3 4]
[1 3]
[1.  1.6 2.2 2.8 3.4 4.  4.6 5.2]


In [53]:
a=np.arange(0,80,10)
a

array([ 0, 10, 20, 30, 40, 50, 60, 70])

In [54]:
#需要指定索引位置
indices = [1,2,-3]
y = a[indices]
print(y)

[10 20 50]


还可以用布尔数组来实现花式索引：

In [56]:
mask = np.array([0,1,1,0,0,1,0,0],
            dtype=bool)

In [57]:
a[mask]

array([10, 20, 50])

Note: mask函数并不会影响到数组的值

或者我们用布尔表达式生成`mask`，选出所有大于0.5的值：

In [58]:
from numpy.random import rand
a = rand(10)
a

array([0.37888858, 0.14246066, 0.61436093, 0.20372216, 0.47068793,
       0.68150869, 0.71722817, 0.16192538, 0.5255508 , 0.84404322])

In [59]:
mask= a>0.5
a[mask]

array([0.61436093, 0.68150869, 0.71722817, 0.5255508 , 0.84404322])

Note: mask mast be bool

# 二维花式索引

In [61]:
a=np.array([[0,1,2,3,4,5],
        [10,11,12,13,14,15],
        [20,21,22,23,24,25],
        [30,31,32,33,34,35],
        [40,41,42,43,44,45],
        [50,51,52,53,54,55]])
a

array([[ 0,  1,  2,  3,  4,  5],
       [10, 11, 12, 13, 14, 15],
       [20, 21, 22, 23, 24, 25],
       [30, 31, 32, 33, 34, 35],
       [40, 41, 42, 43, 44, 45],
       [50, 51, 52, 53, 54, 55]])

对于二维花式索引，需要给定`row`和`col`的值：

In [63]:
a[(0,1,2,3,4),(1,2,3,4,5)]

array([ 1, 12, 23, 34, 45])

返回的是一条次对角线上的5个值

In [65]:
# 抓取row 4 to 6， col 1 3 6
a[3:,[0,2,5]]

array([[30, 32, 35],
       [40, 42, 45],
       [50, 52, 55]])

也可以使用mask索引：

In [68]:
mask=np.array([1,0,1,0,0,1],dtype=bool)
a[mask,2]

array([ 2, 22, 52])

In [69]:
a

array([[ 0,  1,  2,  3,  4,  5],
       [10, 11, 12, 13, 14, 15],
       [20, 21, 22, 23, 24, 25],
       [30, 31, 32, 33, 34, 35],
       [40, 41, 42, 43, 44, 45],
       [50, 51, 52, 53, 54, 55]])

In [70]:
mask= a>20
a[mask]

array([21, 22, 23, 24, 25, 30, 31, 32, 33, 34, 35, 40, 41, 42, 43, 44, 45,
       50, 51, 52, 53, 54, 55])

Note：与切片不同，花式索引返回的是原对象的一个复制而不是引用。

## 不完全索引

只给定行索引的时候，返回整行：

In [72]:
#取前三行
y = a[:3]
y

array([[ 0,  1,  2,  3,  4,  5],
       [10, 11, 12, 13, 14, 15],
       [20, 21, 22, 23, 24, 25]])

In [76]:
condition=np.array([0,1,1,0,1,0],dtype=bool) #六维的数据必须规定六维才有意义
a[condition]

array([[10, 11, 12, 13, 14, 15],
       [20, 21, 22, 23, 24, 25],
       [40, 41, 42, 43, 44, 45]])

## 三维花式索引

In [83]:
a=np.arange(64)
a.shape=4,4,4 #这里取值的规定是什么，不能取为2,3,4，也不能取为4,4,4,4
a

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11],
        [12, 13, 14, 15]],

       [[16, 17, 18, 19],
        [20, 21, 22, 23],
        [24, 25, 26, 27],
        [28, 29, 30, 31]],

       [[32, 33, 34, 35],
        [36, 37, 38, 39],
        [40, 41, 42, 43],
        [44, 45, 46, 47]],

       [[48, 49, 50, 51],
        [52, 53, 54, 55],
        [56, 57, 58, 59],
        [60, 61, 62, 63]]])

In [85]:
y=a[1:,:,[2,-1]] #第一个为模块数，第二个为行，第三个为列
y

array([[[18, 19],
        [22, 23],
        [26, 27],
        [30, 31]],

       [[34, 35],
        [38, 39],
        [42, 43],
        [46, 47]],

       [[50, 51],
        [54, 55],
        [58, 59],
        [62, 63]]])

# where语句

where(array)  
`where`function will return all indexes that are not equal to 0

## One-dimension Array

In [87]:
a=np.array([0,12,5,20])

In [88]:
# judge elements whether bigger than 10
a>10

array([False,  True, False,  True])

In [90]:
# the position of indexes >10
np.where(a>10)

(array([1, 3], dtype=int64),)

Or

In [91]:
indices=np.where(a>10)[0] #Why add a [0]
indices

array([1, 3], dtype=int64)

也可以直接用`where`返回的值进行索引：

In [94]:
loc=np.where(a>10)
a[loc]

array([12, 20])

## 多维数组

In [96]:
a = np.array([[0,12,5,20],
              [1,2,11,15]])
loc = np.where(a>10)
print(loc)

(array([0, 0, 1, 1], dtype=int64), array([1, 3, 2, 3], dtype=int64))


Note：返回的结果是一个二维的元组，每一堆代表这一维的索引值

In [97]:
a[loc]

array([12, 20, 11, 15])

Or

In [99]:
rows, cols = np.where(a>10)

In [100]:
rows

array([0, 0, 1, 1], dtype=int64)

In [102]:
cols

array([1, 3, 2, 3], dtype=int64)

In [103]:
a[rows,cols]

array([12, 20, 11, 15])

Look at another example:

In [105]:
a = np.arange(25)
a.shape = 5,5
a

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14],
       [15, 16, 17, 18, 19],
       [20, 21, 22, 23, 24]])

In [106]:
a > 12

array([[False, False, False, False, False],
       [False, False, False, False, False],
       [False, False, False,  True,  True],
       [ True,  True,  True,  True,  True],
       [ True,  True,  True,  True,  True]])

In [108]:
np.where(a>12)

(array([2, 2, 3, 3, 3, 3, 3, 4, 4, 4, 4, 4], dtype=int64),
 array([3, 4, 0, 1, 2, 3, 4, 0, 1, 2, 3, 4], dtype=int64))

# 小结

## 几个函数

np.array  
np.where  
np.arange  

## 切片

a[lower,upper,step]  
切片是引用  


## 花式索引

np.arange(lower,upper,step)  

mask函数的使用  

where函数的使用
