## Numpy属性

- ndim:维
- shape: 形状，行和列数
- size:元素个数



In [1]:
import numpy as np

raw_a = [[1, 1, 1], [2, 2, 2], [3, 4, 5]]
np_a = np.array(raw_a)
print(np_a)

[[1 1 1]
 [2 2 2]
 [3 4 5]]


In [2]:
print(np_a.ndim)

2


In [3]:
print(np_a.shape)

(3, 3)


In [4]:
print(np_a.size)

9


## 创建各种数组

最普通的创建数组：


In [5]:
a = np.array([1, 2, 3, 4, 5])
print(a, a.dtype)

[1 2 3 4 5] int32


### 指定数据类型 dtype

In [6]:
a = np.array([1, 2, 3, 4, 5], dtype=np.int64)
print(a, a.dtype)

[1 2 3 4 5] int64


In [7]:
a = np.array([1, 2, 3, 4, 5], dtype=np.int)
print(a, a.dtype)

[1 2 3 4 5] int32


In [8]:
a = np.array([1, 2, 3, 4, 5], dtype=np.float)
print(a, a.dtype)

[1. 2. 3. 4. 5.] float64


In [9]:
a = np.array([1, 2, 3, 4, 5], dtype=np.float32)
print(a, a.dtype)

[1. 2. 3. 4. 5.] float32


### 全零数组

numpy.zeros()

In [10]:
s = (5, 3)
zeros = np.zeros(s)
print(zeros)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]]


### 全一数组

numpy.ones()

In [11]:
s = (5, 3)
b = np.ones(s, dtype=np.int64)
print(b, b.dtype)

[[1 1 1]
 [1 1 1]
 [1 1 1]
 [1 1 1]
 [1 1 1]] int64


### 全空数组

创建全空数组, 其实每个值都是接近于零的数:

In [12]:
s = (5, 3)
c = np.empty(s)
print(c, c.dtype)

[[0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]
 [0. 0. 0.]] float64


### 连续数组


In [13]:
d = np.arange(100, 201, 10) # 100-200间的数据，步长为10
print(d)
print(d.shape)
print(d.size)

[100 110 120 130 140 150 160 170 180 190 200]
(11,)
11


`np.reshape()` 重塑

In [15]:
e = np.arange(1, 13)
e.reshape((3, 4))

array([[ 1,  2,  3,  4],
       [ 5,  6,  7,  8],
       [ 9, 10, 11, 12]])

`np.linspace`均分区间产生线性矢量

In [17]:
lo = 1
hi = 20
n = 10
f = np.linspace(lo, hi, n)
print(f)
print(f.size)

[ 1.          3.11111111  5.22222222  7.33333333  9.44444444 11.55555556
 13.66666667 15.77777778 17.88888889 20.        ]
10


In [20]:
f.reshape((2, 5))

array([[ 1.        ,  3.11111111,  5.22222222,  7.33333333,  9.44444444],
       [11.55555556, 13.66666667, 15.77777778, 17.88888889, 20.        ]])

`logspace()`

logspace(a, b, n)生成一个(1xn)数组，数据的第一个元素值为a，最后一个元素为b，n是总采样点数。需要注意的是，此时产生的数组元素在10^a 到10^b上并不是均匀分布的，而形成一个对数曲线。

In [19]:
lo = 1
hi = 4
n = 4
g = np.logspace(lo, hi, n)
print(g)
print(g.size)

[   10.   100.  1000. 10000.]
4


## numpy基础运算



In [24]:
import numpy as np

a = np.arange(100, 901, 100)
b = np.array([[0, 0, 0], [1, 2, 3], [-1, -2, -3]])

print('a=', a)
print('b=', b)

a= [100 200 300 400 500 600 700 800 900]
b= [[ 0  0  0]
 [ 1  2  3]
 [-1 -2 -3]]


In [25]:
c = np.arange(3)
print(c)

[0 1 2]


In [26]:
a+c

ValueError: operands could not be broadcast together with shapes (9,) (3,) 

In [27]:
b+c

array([[ 0,  1,  2],
       [ 1,  3,  5],
       [-1, -1, -1]])

In [28]:
a.reshape((3, 3))+c

array([[100, 201, 302],
       [400, 501, 602],
       [700, 801, 902]])

In [29]:
b**2

array([[0, 0, 0],
       [1, 4, 9],
       [1, 4, 9]], dtype=int32)

In [30]:
2*np.sin(a)-np.pi

array([-4.15432394, -4.88818725, -5.14110433, -4.84343137, -4.07713626,
       -3.05322776, -2.05365161, -1.35365336, -1.1459861 ])

In [31]:
print(a<520)

[ True  True  True  True  True False False False False]


### 矩阵乘法


#### 两矩阵逐个元素相乘


In [32]:
a = np.array([[1, 0, 0], 
             [0, 1, 0],
             [0, 0, 1]])
b = np.array([[1, 2, 3],
             [4, 5, 6], 
             [7, 8, 9]])

print(a*b)

[[1 0 0]
 [0 5 0]
 [0 0 9]]


#### 矩阵相乘

In [33]:
print(np.dot(a, b))

[[1 2 3]
 [4 5 6]
 [7 8 9]]


In [34]:
print(a.dot(b))

[[1 2 3]
 [4 5 6]
 [7 8 9]]


#### sum(), min(), max()

求和，最小值，最大值

In [35]:
a = np.random.random((2, 3))
print(a)

[[0.17834103 0.62398286 0.51941265]
 [0.29553434 0.36061784 0.70611053]]


In [36]:
np.sum(a)

2.683999257330236

In [37]:
np.max(a)

0.7061105309037907

In [38]:
np.min(a)

0.17834103382073974

In [42]:
a = np.array([[0, 2,0],
            [1, 0, 3],
            [0, 0, 0]])
print(a)
print("列求和：", np.sum(a, axis=0))
print("行求和：",np.sum(a, axis=1))

[[0 2 0]
 [1 0 3]
 [0 0 0]]
列求和： [1 2 3]
行求和： [2 4 0]


In [43]:
print("列min：", np.min(a, axis=0))
print("行min：", np.min(a, axis=1))

列min： [0 0 0]
行min： [0 0 0]


In [44]:
print("列max：", np.max(a, axis=0))
print("行max：", np.max(a, axis=1))

列max： [1 2 3]
行max： [2 3 0]


In [46]:
A = np.arange(2, 14).reshape(4, 3)
print(np.argmin(A))
print(np.argmax(A))


0
11


In [47]:
print(np.mean(A))

7.5


In [48]:
A.mean()

7.5

In [49]:
print(np.average(A))

7.5


In [50]:
A.average()

AttributeError: 'numpy.ndarray' object has no attribute 'average'

In [51]:
np.median(A)

7.5

In [52]:
# 
print(A)
print(np.cumsum(A))

[[ 2  3  4]
 [ 5  6  7]
 [ 8  9 10]
 [11 12 13]]
[ 2  5  9 14 20 27 35 44 54 65 77 90]


In [53]:
print(np.diff(A))

[[1 1]
 [1 1]
 [1 1]
 [1 1]]


In [55]:
print(np.nonzero(A))

(array([0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3], dtype=int64), array([0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2], dtype=int64))


In [56]:
B = np.random.random((3, 4))
print(B)
print(np.sort(B))

[[0.96285893 0.79187328 0.36609067 0.36656789]
 [0.48140849 0.10138784 0.66218463 0.69472287]
 [0.87650184 0.50549467 0.83789424 0.12110094]]
[[0.36609067 0.36656789 0.79187328 0.96285893]
 [0.10138784 0.48140849 0.66218463 0.69472287]
 [0.12110094 0.50549467 0.83789424 0.87650184]]


In [59]:
C = np.linspace(-4, 5, 10).reshape(5, 2)
print(C)
print("转置")
print(np.transpose(C))
print(C.T)

[[-4. -3.]
 [-2. -1.]
 [ 0.  1.]
 [ 2.  3.]
 [ 4.  5.]]
转置
[[-4. -2.  0.  2.  4.]
 [-3. -1.  1.  3.  5.]]
[[-4. -2.  0.  2.  4.]
 [-3. -1.  1.  3.  5.]]


In [60]:
print(np.dot(C.T, C))

[[40. 40.]
 [40. 45.]]


In [62]:
print(np.clip(C, -2, 2))

[[-2. -2.]
 [-2. -1.]
 [ 0.  1.]
 [ 2.  2.]
 [ 2.  2.]]


In [63]:
print(np.mean(C, axis=0))

[0. 1.]


In [64]:
print(np.mean(C, axis=1))

[-3.5 -1.5  0.5  2.5  4.5]


关于`axis=0/1`代表的意义，我个人的记忆方法是，`axis=0`，得到的结果是投影到横轴的，也就是列进行计算；`axis=1`,得到的结果是投影到纵轴，也就是行进行计算。

更多操作[英文官方手册](https://docs.scipy.org/doc/numpy-dev/user/quickstart.html)

## Numpy索引


In [65]:
import numpy as np

A = np.arange(3, 15)
print(A)
print(A[2])

[ 3  4  5  6  7  8  9 10 11 12 13 14]
5


In [66]:
A = np.arange(3, 15).reshape(3, 4)
print(A)
print(A[2])

[[ 3  4  5  6]
 [ 7  8  9 10]
 [11 12 13 14]]
[11 12 13 14]


In [67]:
print(A[0][2])

5


In [68]:
print(A[0, 2])

5


In [69]:
print(A[:, 2])

[ 5  9 13]


In [70]:
print(A[0,:])

[3 4 5 6]


In [71]:
print(A[0, 1:2])

[4]


#### 迭代矩阵的行和列

迭代行：


In [72]:
for row in A:
    print(row)

[3 4 5 6]
[ 7  8  9 10]
[11 12 13 14]


In [73]:
for col in A.T:
    print(col)

[ 3  7 11]
[ 4  8 12]
[ 5  9 13]
[ 6 10 14]


#### 迭代元素

In [74]:
print(A.flatten())

[ 3  4  5  6  7  8  9 10 11 12 13 14]


In [76]:
A.flat

<numpy.flatiter at 0x1b97d26aad0>

In [77]:
for e in A.flat:
    print(e)

3
4
5
6
7
8
9
10
11
12
13
14


## Numpy数组合并

In [79]:
import numpy as np
import copy

A = np.ones(6).reshape(2, 3)
print(A)
B = copy.deepcopy(A)*2
print(B)
print(np.vstack((A, B)))

[[1. 1. 1.]
 [1. 1. 1.]]
[[2. 2. 2.]
 [2. 2. 2.]]
[[1. 1. 1.]
 [1. 1. 1.]
 [2. 2. 2.]
 [2. 2. 2.]]


In [80]:
print(np.hstack((A, B)))

[[1. 1. 1. 2. 2. 2.]
 [1. 1. 1. 2. 2. 2.]]


#### `np.newaxis()`

说完了array的合并，我们稍稍提及一下前一节中转置操作，如果面对如同前文所述的A序列， 转置操作便很有可能无法对其进行转置（因为A并不是矩阵的属性），此时就需要我们借助其他的函数操作进行转置：

In [100]:
A = np.ones(4)
print(A)

[1. 1. 1. 1.]


In [101]:
print(A.T)

[1. 1. 1. 1.]


In [102]:
print(A[np.newaxis, :])

[[1. 1. 1. 1.]]


In [103]:
print(A[:, np.newaxis])

[[1.]
 [1.]
 [1.]
 [1.]]


In [104]:
print(np.concatenate((A, A, A, A)))

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]


In [109]:
A = A[:,np.newaxis]
B = B[:,np.newaxis]

In [110]:
print(np.concatenate((A, A, A), axis = 0))

[[1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]
 [1.]]


In [112]:
# print(A)
print(np.concatenate((A, A, A), axis = 1))

[[1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]
 [1. 1. 1.]]


## Numpy分割

In [114]:
import numpy as np

A = np.arange(2, 14).reshape(3, 4)
print(A)

[[ 2  3  4  5]
 [ 6  7  8  9]
 [10 11 12 13]]


纵向分割

In [118]:
print(np.split(A, 2, axis=1))

[array([[ 2,  3],
       [ 6,  7],
       [10, 11]]), array([[ 4,  5],
       [ 8,  9],
       [12, 13]])]


In [120]:
print(np.split(A, 3, axis=0))

[array([[2, 3, 4, 5]]), array([[6, 7, 8, 9]]), array([[10, 11, 12, 13]])]


### 不等量分割

`np.split()`只能对数组进行等量分割
`np.array_split()`可以对数组进行非等量分割

In [121]:
print(np.array_split(A, 3))

[array([[2, 3, 4, 5]]), array([[6, 7, 8, 9]]), array([[10, 11, 12, 13]])]


In [122]:
print(np.array_split(A, 3, axis=1))

[array([[ 2,  3],
       [ 6,  7],
       [10, 11]]), array([[ 4],
       [ 8],
       [12]]), array([[ 5],
       [ 9],
       [13]])]


In [123]:
print(np.vsplit(A, 3))

[array([[2, 3, 4, 5]]), array([[6, 7, 8, 9]]), array([[10, 11, 12, 13]])]


In [125]:
print(np.hsplit(A, 2))

[array([[ 2,  3],
       [ 6,  7],
       [10, 11]]), array([[ 4,  5],
       [ 8,  9],
       [12, 13]])]


## Numpy拷贝，深度拷贝

In [126]:
import numpy as np 

A = np.arange(6)
B = A
print(A)
B[0] = 110
print(A)

[0 1 2 3 4 5]
[110   1   2   3   4   5]


In [127]:
C = np.copy(A)
C[0] = 222
print(A)
print(C)

[110   1   2   3   4   5]
[222   1   2   3   4   5]


In [128]:
A = np.array([[1, 2, 1], 2, 3])
print(A)

[list([1, 2, 1]) 2 3]


In [129]:
print(type(A))

<class 'numpy.ndarray'>


In [130]:
B = np.copy(A)
print(A[0]==B[0])

True


In [132]:
print(id(A[0])==id(B[0]))

True


In [133]:
print(A)
A[2] = 333
print(A)
print(B)


[list([1, 2, 1]) 2 3]
[list([1, 2, 1]) 2 333]
[list([1, 2, 1]) 2 3]


In [134]:
B[0][0] = 666
print(A)

[list([666, 2, 1]) 2 333]


In [135]:
C = A.copy()
print(C)

[list([666, 2, 1]) 2 333]


In [136]:
A[0][0] = 999
print(C)

[list([999, 2, 1]) 2 333]


[为什么用 Numpy 还是慢, 你用对了吗?](https://morvanzhou.github.io/tutorials/data-manipulation/np-pd/4-1-speed-up-numpy/)

[Getting the Best Performance out of NumPy](http://link.zhihu.com/?target=http%3A//ipython-books.github.io/featured-01/)