# ndarray与Python相比的速度优势

In [5]:
import numpy as np
import random
import time

a = []
for i in range(100000000):
    a.append(random.random())

t1 = time.time()
sum1 = sum(a)
t2 = time.time()

b = np.array(a)
t3 = time.time()
sum2 = np.sum(b)
t4 = time.time()

print('原生函数使用时间：%f， ndarray使用时间：%f' %(t2-t1,t4-t3))

原生函数使用时间：0.557533， ndarray使用时间：0.169564


可以看到ndarray的速度有明显优势。

In [6]:
# ndarray

In [8]:
list1 = [1,2,3,4]
oneArray = np.array(list1)
type(oneArray)

numpy.ndarray

In [9]:
oneArray

array([1, 2, 3, 4])

### 创建ndarray的方式

In [10]:
# 直接传入列表

t1 = np.array([1,2,3])
print(t1)
print(type(t1))

[1 2 3]
<class 'numpy.ndarray'>


In [12]:
# 使用range序列

t2 = np.array(range(10))
print(t2)
print(type(t2))

[0 1 2 3 4 5 6 7 8 9]
<class 'numpy.ndarray'>


In [13]:
# 使用np.arange()

t3 = np.arange(0,10,2)
print(t3)
print(type(t3))

[0 2 4 6 8]
<class 'numpy.ndarray'>


### 创建2维数组

In [14]:
list2 = [[1,2],[3,4],[5,6]]

In [15]:
twoArray = np.array(list2)
twoArray

array([[1, 2],
       [3, 4],
       [5, 6]])

### 常用属性

In [16]:
# 获取数组的维度
twoArray.ndim

2

In [17]:
# 获取数组的形状
twoArray.shape

(3, 2)

In [18]:
# 获取元素的个数
twoArray.size

6

### 调整形状

In [19]:
four = np.array([[1,2,3],[4,5,6]])
four

array([[1, 2, 3],
       [4, 5, 6]])

In [20]:
four.shape = (3,2)
four

array([[1, 2],
       [3, 4],
       [5, 6]])

In [21]:
# 返回一个新的数组
four.reshape(3,2)
four

array([[1, 2],
       [3, 4],
       [5, 6]])

In [22]:
# 将多维数组变成一维数组
five = four.reshape((6,),order = 'F') # F 表示以列展开
five

array([1, 3, 5, 2, 4, 6])

In [23]:
six = four.flatten(order = 'F')
six

array([1, 3, 5, 2, 4, 6])

In [24]:
t = np.arange(24)
t

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16,
       17, 18, 19, 20, 21, 22, 23])

In [25]:
t.shape

(24,)

In [26]:
# 转换成二维数组

t1 = t.reshape((4,6))
t1

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [27]:
t1.shape

(4, 6)

In [28]:
# 转换为三维数组

t2 = t.reshape((2,3,4))
t2

array([[[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]],

       [[12, 13, 14, 15],
        [16, 17, 18, 19],
        [20, 21, 22, 23]]])

In [29]:
t2.shape

(2, 3, 4)

### 数组转为列表

In [31]:
a = np.array([9,12,88,14,25])
list_a = a.tolist()
a

array([ 9, 12, 88, 14, 25])

In [32]:
list_a

[9, 12, 88, 14, 25]

In [33]:
type(list_a)

list

### NumPy的数据类型

In [34]:
f = np.array([1,2,3,4,5], dtype = np.int16)
f

array([1, 2, 3, 4, 5], dtype=int16)

In [35]:
# 返回数组中每个元素的字节长度
f.itemsize

2

In [36]:
# 获取数据类型
f.dtype

dtype('int16')

In [37]:
# 调整数据类型
f1 = f.astype(np.int64)
f1.dtype

dtype('int64')

In [38]:
# 随机生成小数
round(random.random(),2) # 保留两位小数

0.32

In [40]:
arr = np.array([random.random() for i in range(10)])
arr

array([0.30860118, 0.3416511 , 0.17541592, 0.88069649, 0.11437206,
       0.88621786, 0.37626542, 0.78782377, 0.61445822, 0.78876178])

In [41]:
# 去小数点后两位
np.round(arr,2)

array([0.31, 0.34, 0.18, 0.88, 0.11, 0.89, 0.38, 0.79, 0.61, 0.79])

# 数组的计算

## 与数的计算

In [43]:
t1 = np.arange(24).reshape((6,4))
t1

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

In [44]:
t1 + 2

array([[ 2,  3,  4,  5],
       [ 6,  7,  8,  9],
       [10, 11, 12, 13],
       [14, 15, 16, 17],
       [18, 19, 20, 21],
       [22, 23, 24, 25]])

In [45]:
t1 * 2

array([[ 0,  2,  4,  6],
       [ 8, 10, 12, 14],
       [16, 18, 20, 22],
       [24, 26, 28, 30],
       [32, 34, 36, 38],
       [40, 42, 44, 46]])

In [46]:
t1 / 2

array([[ 0. ,  0.5,  1. ,  1.5],
       [ 2. ,  2.5,  3. ,  3.5],
       [ 4. ,  4.5,  5. ,  5.5],
       [ 6. ,  6.5,  7. ,  7.5],
       [ 8. ,  8.5,  9. ,  9.5],
       [10. , 10.5, 11. , 11.5]])

## 数组与数组之间

In [48]:
t1 = np.arange(24).reshape((6,4))
t2 = np.arange(100,124).reshape((6,4))

In [49]:
t1

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11],
       [12, 13, 14, 15],
       [16, 17, 18, 19],
       [20, 21, 22, 23]])

In [50]:
t2

array([[100, 101, 102, 103],
       [104, 105, 106, 107],
       [108, 109, 110, 111],
       [112, 113, 114, 115],
       [116, 117, 118, 119],
       [120, 121, 122, 123]])

In [51]:
t1 + t2

array([[100, 102, 104, 106],
       [108, 110, 112, 114],
       [116, 118, 120, 122],
       [124, 126, 128, 130],
       [132, 134, 136, 138],
       [140, 142, 144, 146]])

In [52]:
t1 * t2

array([[   0,  101,  204,  309],
       [ 416,  525,  636,  749],
       [ 864,  981, 1100, 1221],
       [1344, 1469, 1596, 1725],
       [1856, 1989, 2124, 2261],
       [2400, 2541, 2684, 2829]])

满足矩阵运算，所以不同形状的多维数组不能四则运算。

但如果行数或者列数相同的数组可以进行运算：

In [53]:
# 行形状相同
t1 = np.arange(24).reshape((4,6))
t2 = np.arange(0,6)

In [55]:
t1

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [56]:
t2

array([0, 1, 2, 3, 4, 5])

In [57]:
t1 - t2

array([[ 0,  0,  0,  0,  0,  0],
       [ 6,  6,  6,  6,  6,  6],
       [12, 12, 12, 12, 12, 12],
       [18, 18, 18, 18, 18, 18]])

# 轴

轴即是坐标轴，对于一位数组，只有一个坐标轴（0轴），对于二维数组有两个，三维数组有三个轴（0,1,2）。  
在轴的概念下我们才能计算，如计算一个二维数组的平均值，不同轴上值不同。

In [58]:
a = np.array([[1,2,3],[4,5,6]])
a

array([[1, 2, 3],
       [4, 5, 6]])

In [59]:
np.sum(a,axis = 0)

array([5, 7, 9])

In [60]:
np.sum(a,axis = 1)

array([ 6, 15])

In [61]:
np.sum(a)

21

对三维数组：

In [62]:
a = np.arange(27).reshape((3,3,3))
a

array([[[ 0,  1,  2],
        [ 3,  4,  5],
        [ 6,  7,  8]],

       [[ 9, 10, 11],
        [12, 13, 14],
        [15, 16, 17]],

       [[18, 19, 20],
        [21, 22, 23],
        [24, 25, 26]]])

In [63]:
np.sum(a,axis = 0)

array([[27, 30, 33],
       [36, 39, 42],
       [45, 48, 51]])

In [64]:
np.sum(a, axis = 1) # 变成了3个 1*3的矩阵

array([[ 9, 12, 15],
       [36, 39, 42],
       [63, 66, 69]])

In [65]:
np.sum(a, axis = 2)

array([[ 3, 12, 21],
       [30, 39, 48],
       [57, 66, 75]])

# 数组的索引和切片

In [66]:
a = np.arange(10)
a

array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [67]:
a[2:7:2]

array([2, 4, 6])

In [70]:
a[2:]

array([2, 3, 4, 5, 6, 7, 8, 9])

In [71]:
a[2]

2

对一位数组，方法相同

多维数组的操作方法:

In [73]:
t1 = np.arange(24).reshape(4,6)
t1

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [74]:
# 取一行，索引还是从0开始
t1[1]

array([ 6,  7,  8,  9, 10, 11])

In [75]:
# 取一行
t1[1,:]

array([ 6,  7,  8,  9, 10, 11])

In [76]:
# 取连续的多行
t1[1:]

array([[ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [77]:
# 取连续的多行
t1[1:3,:]

array([[ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17]])

In [78]:
# 取不连续的多行
t1[[0,2,3]]

array([[ 0,  1,  2,  3,  4,  5],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [79]:
# 取不连续的多行
t1[[0,2,3],:]

array([[ 0,  1,  2,  3,  4,  5],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [80]:
# 取一列
t1[:,1]

array([ 1,  7, 13, 19])

In [81]:
t1[:,1:]

array([[ 1,  2,  3,  4,  5],
       [ 7,  8,  9, 10, 11],
       [13, 14, 15, 16, 17],
       [19, 20, 21, 22, 23]])

In [82]:
t1[:,[0,2,3]]

array([[ 0,  2,  3],
       [ 6,  8,  9],
       [12, 14, 15],
       [18, 20, 21]])

In [83]:
# 取第三行第四列的值
t1[2,3]

15

In [84]:
# 取多个位置的值
t1[[0,1,1],[0,1,3]]

array([0, 7, 9])

# 数组中的数值修改

In [85]:
t = np.arange(24).reshape(4,6)
t

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [86]:
# 修改某一行的值
t[1,:] = 0
t

array([[ 0,  1,  2,  3,  4,  5],
       [ 0,  0,  0,  0,  0,  0],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [87]:
# 修改某一列的值
t[:,1] = 0
t

array([[ 0,  0,  2,  3,  4,  5],
       [ 0,  0,  0,  0,  0,  0],
       [12,  0, 14, 15, 16, 17],
       [18,  0, 20, 21, 22, 23]])

In [88]:
t = np.arange(24).reshape(4,6)
t

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [89]:
t[1:3,:] = 0
t

array([[ 0,  1,  2,  3,  4,  5],
       [ 0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [18, 19, 20, 21, 22, 23]])

In [90]:
t[:,1:3] = 0
t

array([[ 0,  0,  0,  3,  4,  5],
       [ 0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0,  0,  0],
       [18,  0,  0, 21, 22, 23]])

In [91]:
t = np.arange(24).reshape(4,6)
t

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [93]:
t[1:3,2:4] = 0
t

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  0,  0, 10, 11],
       [12, 13,  0,  0, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [94]:
t[[0,1],[3,5]] = 0
t

array([[ 0,  1,  2,  0,  4,  5],
       [ 6,  7,  0,  0, 10,  0],
       [12, 13,  0,  0, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [95]:
t = np.arange(24).reshape(4,6)
t

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

或者可以直接安州逻辑条件判断修改:

In [96]:
t<10

array([[ True,  True,  True,  True,  True,  True],
       [ True,  True,  True,  True, False, False],
       [False, False, False, False, False, False],
       [False, False, False, False, False, False]])

In [97]:
t[t<10] = 0
t

array([[ 0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [119]:
t = np.arange(24).reshape(4,6)
t

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [120]:
t1 = np.arange(24).reshape(4,6)
t1[(t1>2)&(t1<10)] = 0
t1

array([[ 0,  1,  2,  0,  0,  0],
       [ 0,  0,  0,  0, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [121]:
t2 = np.arange(24).reshape(4,6)
t2[(t2<10)|(t2>19)] = 0
t2

array([[ 0,  0,  0,  0,  0,  0],
       [ 0,  0,  0,  0, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19,  0,  0,  0,  0]])

In [122]:
t3 = np.arange(24).reshape(4,6)
t3[~(t3>7)] = 0
t3

array([[ 0,  0,  0,  0,  0,  0],
       [ 0,  0,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

 三目运算：


In [123]:
score = np.array([[80,88],[82,81],[75,81]])
score

array([[80, 88],
       [82, 81],
       [75, 81]])

In [125]:
result = np.where(score>80,True,False)
result

array([[False,  True],
       [ True,  True],
       [False,  True]])

In [126]:
score>80

array([[False,  True],
       [ True,  True],
       [False,  True]])

# 数组的添加、删除和去重

## 数组的添加

1. `numpy.append()` 函数在数组的末位添加值。追加操作会分配整个数组，并把原来的数组复制到新数组中。 特别的，输入数组的维度必须匹配，否则将产生ValueError

In [128]:
a = np.array([[1,2,3],[4,5,6]])
a

array([[1, 2, 3],
       [4, 5, 6]])

In [134]:
np.append(a,[7,8,9])

array([1, 2, 3, 4, 5, 6, 7, 8, 9])

In [136]:
np.append(a,[[7,8,9]],axis = 0)

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [137]:
np.append(a,[[5,5,5],[7,8,9]],axis = 1)

array([[1, 2, 3, 5, 5, 5],
       [4, 5, 6, 7, 8, 9]])

2. `numpy.insert()` 函数在给定索引之前，沿给定州在输入数组中插入值。  
    如果只的类型转为插入，则它与输入数组不同。 插入函数会返回一个新数组。  
    特别的，如果没提供轴，则数组会被展开。

In [138]:
a = np.array([[1,2,],[3,4],[5,6]])
a

array([[1, 2],
       [3, 4],
       [5, 6]])

In [139]:
np.insert(a,3,[11,12])

array([ 1,  2,  3, 11, 12,  4,  5,  6])

In [140]:
np.insert(a,1,[11],axis = 0)

array([[ 1,  2],
       [11, 11],
       [ 3,  4],
       [ 5,  6]])

## 数组的删除

`numpy.delete()` 函数返回从输入数组中删除指定子数组的新数组。 与 `insert()`函数相同，如果未提供轴参数，则输入数组将展开。

In [141]:
a = np.arange(12).reshape(3,4)
a

array([[ 0,  1,  2,  3],
       [ 4,  5,  6,  7],
       [ 8,  9, 10, 11]])

In [142]:
np.delete(a,1,axis = 1)

array([[ 0,  2,  3],
       [ 4,  6,  7],
       [ 8, 10, 11]])

In [143]:
np.delete(a,5)

array([ 0,  1,  2,  3,  4,  6,  7,  8,  9, 10, 11])

## 数组去重

`numpy.unique()` 函数用于去除数组中重复的元素。

In [144]:
a = np.array([5,2,6,2,7,5,6,8,2,9])
a

array([5, 2, 6, 2, 7, 5, 6, 8, 2, 9])

In [147]:
u = np.unique(a)
u

array([2, 5, 6, 7, 8, 9])

In [149]:
u,indices = np.unique(a, return_index = True)
indices

array([1, 0, 2, 4, 7, 9], dtype=int64)

In [150]:
a

array([5, 2, 6, 2, 7, 5, 6, 8, 2, 9])

In [151]:
u,indices = np.unique(a,return_inverse = True)
u

array([2, 5, 6, 7, 8, 9])

In [152]:
indices

array([1, 0, 2, 0, 3, 1, 2, 4, 0, 5], dtype=int64)

# numpy的计算

In [153]:
score = np.array([[80,88],[82,81],[75,81]])
score

array([[80, 88],
       [82, 81],
       [75, 81]])

In [154]:
np.max(score)

88

In [155]:
np.max(score,axis = 0)

array([82, 88])

In [157]:
np.min(score)

75

In [158]:
np.min(score,axis = 1)

array([80, 81, 75])

In [160]:
np.maximum([-2,-1,0,1,2],0)

array([0, 0, 0, 1, 2])

In [162]:
np.minimum([-2,-1,0,1,2],0)

array([-2, -1,  0,  0,  0])

In [163]:
np.maximum([-2,-1,0,1,2],[1,2,3,4,5])

array([1, 2, 3, 4, 5])

In [164]:
np.mean(score)

81.16666666666667

In [165]:
np.mean(score,axis = 0)

array([79.        , 83.33333333])

In [166]:
arr = np.array([[1,2,3],[4,5,6]])
arr

array([[1, 2, 3],
       [4, 5, 6]])

In [167]:
arr.cumsum(0)

array([[1, 2, 3],
       [5, 7, 9]], dtype=int32)

In [170]:
arr.cumsum(1)

array([[ 1,  3,  6],
       [ 4,  9, 15]], dtype=int32)

```
[1, 2, 3] -------> [1, 2+1, 3+2+1]
[4, 5, 6] -------> [4, 5+4, 6+5+4]
```

In [172]:
np.argmin(score,axis = 0)

array([2, 1], dtype=int64)

In [173]:
np.std(score,axis = 0)

array([2.94392029, 3.29983165])

In [174]:
np.ptp(score)

13

# 数组的拼接

In [175]:
a = np.array([[1,2],[3,4]])
a

array([[1, 2],
       [3, 4]])

In [177]:
b = np.array([[5,6],[7,8]])
b

array([[5, 6],
       [7, 8]])

In [178]:
# 两数组维度相同时可以拼接
np.concatenate((a,b),axis = 0)

array([[1, 2],
       [3, 4],
       [5, 6],
       [7, 8]])

In [179]:
np.concatenate((a,b),axis = 1)

array([[1, 2, 5, 6],
       [3, 4, 7, 8]])

In [180]:
# 根据轴进行堆叠
np.stack((a,b),axis = 0)

array([[[1, 2],
        [3, 4]],

       [[5, 6],
        [7, 8]]])

In [181]:
np.stack((a,b),axis = 1)

array([[[1, 2],
        [5, 6]],

       [[3, 4],
        [7, 8]]])

In [182]:
# 矩阵垂直拼接
v1 = [[0,1,2,3,4,5],
      [6,7,8,9,10,11]]
v1

[[0, 1, 2, 3, 4, 5], [6, 7, 8, 9, 10, 11]]

In [183]:
v2 = [[12,13,14,15,16,17],
      [18,19,20,21,22,23]]
v2

[[12, 13, 14, 15, 16, 17], [18, 19, 20, 21, 22, 23]]

In [184]:
np.vstack((v1,v2))

array([[ 0,  1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10, 11],
       [12, 13, 14, 15, 16, 17],
       [18, 19, 20, 21, 22, 23]])

In [185]:
# 矩阵水平拼接
np.hstack((v1,v2))

array([[ 0,  1,  2,  3,  4,  5, 12, 13, 14, 15, 16, 17],
       [ 6,  7,  8,  9, 10, 11, 18, 19, 20, 21, 22, 23]])

# 数组的分割

将一个数组分割为多个数组：