# Numpy

*解决python数值计算速度慢的问题*

* ndarray
* 多维操作
* 线性代数

### ndarray 和 list 的区别
* numpy array 中的元素必须是同一种
* numpy array 能对整个数组进行运算

In [2]:
height = [1.73, 1.68, 1.71, 1.89, 1.79]
weight = [65.4, 59.2, 63.6, 88.4, 68.7]

In [3]:
import numpy as np
np_height = np.array(height)
np_weight = np.array(weight)
np_height, np_weight

(array([1.73, 1.68, 1.71, 1.89, 1.79]), array([65.4, 59.2, 63.6, 88.4, 68.7]))

#### 能对整个数组运算

In [4]:
bmi = np_weight / np_height ** 2
bmi

array([21.85171573, 20.97505669, 21.75028214, 24.7473475 , 21.44127836])

#### 数组可以通过bool类型的数组来创建子集

In [5]:
bmi > 23

array([False, False, False,  True, False])

In [6]:
bmi[bmi > 23]

array([24.7473475])

#### 常用属性

In [7]:
bmi.shape    # 结构 

(5,)

In [8]:
bmi.ndim     # 维度

1

In [9]:
bmi.dtype    # 数据类型

dtype('float64')

In [10]:
bmi.itemsize     # 每个元素的大小（字节）

8

In [11]:
bmi.size     # 包含的元素总数

5

#### 常用数组

* **全零数组，全一数组**

In [12]:
np.zeros([2, 4])

array([[0., 0., 0., 0.],
       [0., 0., 0., 0.]])

In [13]:
np.zeros([2, 4, 5])

array([[[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]]])

In [14]:
np.ones([3, 5], dtype=int)

array([[1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1],
       [1, 1, 1, 1, 1]])

* **随机数组**

In [15]:
np.random.rand(2, 4)   # 两行四列0~1之间满足均匀分布的随机数

array([[0.25698695, 0.42685509, 0.61662815, 0.44260099],
       [0.70909938, 0.282724  , 0.58679363, 0.59352588]])

In [16]:
np.random.rand()

0.8954136866702715

**********************************************************************

In [17]:
np.random.randint(3)   # [0, 3)之间的一个随机整数

0

In [18]:
np.random.randint(1, 10)   # [1, 10)之间的一个随机整数

4

In [19]:
np.random.randint(1, 10, 4)    # [1, 10)之间的 4 个随机整数组成的数组

array([8, 9, 8, 4])

In [20]:
np.random.randint(1, 10, (3, 4))   # [1, 10)之间的随机整数组成的3行4列数组

array([[9, 4, 4, 4],
       [4, 4, 1, 7],
       [4, 4, 5, 8]])

In [21]:
np.random.randint(10, size=(3, 4))   # [0, 10)之间的随机整数组成的3行4列数组

array([[3, 3, 4, 5],
       [3, 0, 4, 2],
       [4, 9, 9, 6]])

---------------------------

In [22]:
np.random.randn()    # 标准正态分布的随机数

0.3597246176081234

In [23]:
np.random.randn(2, 4)   # 指定尺寸

array([[-1.34362888, -0.21765051, -0.89323944, -1.3486793 ],
       [-1.43865797, -0.42619041,  0.47575093,  0.91217559]])

----------------

In [24]:
np.random.choice([1, 4, 6, 10, 8, 9])    # 随机选择

1

--------------

In [25]:
np.random.beta(1, 10, 100)     # beta分布

array([0.11269406, 0.04256631, 0.00993573, 0.15314275, 0.01745378,
       0.04303003, 0.01062698, 0.00624308, 0.14006936, 0.08458487,
       0.14325754, 0.0215947 , 0.11457807, 0.01420411, 0.04194834,
       0.0407996 , 0.02133028, 0.05887166, 0.05446063, 0.0955201 ,
       0.17865126, 0.00501545, 0.33190256, 0.05884824, 0.06856212,
       0.06692322, 0.07685278, 0.00576109, 0.07827689, 0.00155541,
       0.03020704, 0.05330015, 0.11596962, 0.09006773, 0.26812507,
       0.04881254, 0.00652213, 0.02762145, 0.02213979, 0.08606893,
       0.07541994, 0.05074976, 0.07692247, 0.02538226, 0.03511911,
       0.14452148, 0.01698723, 0.03734035, 0.0658981 , 0.01663664,
       0.00356962, 0.0265244 , 0.03801085, 0.04547903, 0.16025337,
       0.12020384, 0.03499691, 0.11967275, 0.00650262, 0.08186373,
       0.03472168, 0.0354114 , 0.21984752, 0.00079284, 0.0360087 ,
       0.14960862, 0.0291025 , 0.16629636, 0.13016608, 0.18527181,
       0.01092162, 0.12752147, 0.0674716 , 0.11901375, 0.16092

In [26]:
# 还有很多其他的数学分布

* #### 常用操作

In [27]:
np.arange(1, 11)

array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [28]:
np.arange(1, 11).reshape(2, 5)

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10]])

In [29]:
lst = np.arange(1, 11).reshape(2, -1)
lst

array([[ 1,  2,  3,  4,  5],
       [ 6,  7,  8,  9, 10]])

----

##### 统一运算

In [30]:
np.exp(lst)

array([[2.71828183e+00, 7.38905610e+00, 2.00855369e+01, 5.45981500e+01,
        1.48413159e+02],
       [4.03428793e+02, 1.09663316e+03, 2.98095799e+03, 8.10308393e+03,
        2.20264658e+04]])

In [31]:
np.exp2(lst)

array([[   2.,    4.,    8.,   16.,   32.],
       [  64.,  128.,  256.,  512., 1024.]])

In [32]:
np.sqrt(lst)

array([[1.        , 1.41421356, 1.73205081, 2.        , 2.23606798],
       [2.44948974, 2.64575131, 2.82842712, 3.        , 3.16227766]])

In [33]:
np.sin(lst)

array([[ 0.84147098,  0.90929743,  0.14112001, -0.7568025 , -0.95892427],
       [-0.2794155 ,  0.6569866 ,  0.98935825,  0.41211849, -0.54402111]])

In [34]:
np.log(lst)

array([[0.        , 0.69314718, 1.09861229, 1.38629436, 1.60943791],
       [1.79175947, 1.94591015, 2.07944154, 2.19722458, 2.30258509]])

In [35]:
np.log10(lst)

array([[0.        , 0.30103   , 0.47712125, 0.60205999, 0.69897   ],
       [0.77815125, 0.84509804, 0.90308999, 0.95424251, 1.        ]])

In [36]:
lst = np.arange(1, 25).reshape(3, 2, 4)
lst

array([[[ 1,  2,  3,  4],
        [ 5,  6,  7,  8]],

       [[ 9, 10, 11, 12],
        [13, 14, 15, 16]],

       [[17, 18, 19, 20],
        [21, 22, 23, 24]]])

In [37]:
lst.sum(), np.sum(lst), sum(lst)

(300, 300, array([[27, 30, 33, 36],
        [39, 42, 45, 48]]))

In [38]:
# 指定维度
print(lst.sum(axis=0),  # 等同于sum(lst)
lst.sum(axis=1),
lst.sum(axis=2), sep='\n\n')

[[27 30 33 36]
 [39 42 45 48]]

[[ 6  8 10 12]
 [22 24 26 28]
 [38 40 42 44]]

[[10 26]
 [42 58]
 [74 90]]


In [39]:
lst.max(), lst.max(axis=1)

(24, array([[ 5,  6,  7,  8],
        [13, 14, 15, 16],
        [21, 22, 23, 24]]))

### 广播

In [41]:
a = np.array([[1, 2, 3],
            [4, 5, 6],
            [7, 8, 9],
            [10, 11, 12]])

In [42]:
b = np.array([1, 2, 3])

In [43]:
a + b

array([[ 2,  4,  6],
       [ 5,  7,  9],
       [ 8, 10, 12],
       [11, 13, 15]])