In [161]:
"""
Numpy: 处理多维数组(矩阵)
"""

In [1]:
import numpy as np
import matplotlib.pyplot as plt

In [2]:
# 创建一个2行3列(该二维数组里面有2个一维数组,每个一维数组有3列)的随机浮点型二维数组,rand()是固定区间0.0~1.0
arr = np.random.randn(2, 3)

In [3]:
arr  # 输出数组

array([[-0.23107475, -1.69753539, -1.45841257],
       [ 1.98218416,  0.0403811 ,  0.8691001 ]])

In [4]:
type(arr)  # 数组类型

numpy.ndarray

In [5]:
arr.ndim  # 数组维度个数(一维二维三维...)

2

In [6]:
arr.shape  # 数组维度大小(m行n列)

(2, 3)

In [7]:
arr.dtype  # 数组中数据类型

dtype('float64')

In [8]:
# 创建一个3行4列的随机浮点型二维数组,uniform()可指定区间大小
arr = np.random.uniform(low=-10.0, high=10.0, size=(3, 4)) 

In [9]:
arr  # 输出数组

array([[-7.02118309, -7.94222991,  5.31487634,  2.36018068],
       [ 6.20745347,  2.56180626, -0.31042087,  8.70358744],
       [ 9.43954384,  2.45998553, -8.86767846,  4.10682653]])

In [10]:
type(arr), arr.ndim, arr.shape, arr.dtype  

(numpy.ndarray, 2, (3, 4), dtype('float64'))

In [11]:
# astype()可以转换数据类型: 转换float64为int32时,是取整不是四舍五入,比如5.69063769-->5  -3.80322353-->-3
arr1 = arr.astype(dtype=int)

In [12]:
arr1

array([[-7, -7,  5,  2],
       [ 6,  2,  0,  8],
       [ 9,  2, -8,  4]])

In [13]:
arr1.dtype 

dtype('int32')

In [14]:
# 创建一个3行4列的随机整型二维数组,randint()可指定区间大小
arr = np.random.randint(low=1, high=10, size=(3, 4))

In [15]:
arr  # 输出数组

array([[2, 5, 5, 8],
       [7, 5, 8, 3],
       [9, 2, 9, 4]])

In [16]:
type(arr), arr.ndim, arr.shape, arr.dtype  

(numpy.ndarray, 2, (3, 4), dtype('int32'))

In [17]:
# 创建一个符合正态分布的随机抽样数组,数据个数是10000
arr = np.random.randn(10000)  

In [18]:
arr  # 输出数组

array([ 0.92876013,  0.60780565,  1.30864805, ...,  2.41872624,
        1.51009263, -0.7882928 ])

In [19]:
type(arr), arr.ndim, arr.shape, arr.dtype

(numpy.ndarray, 1, (10000,), dtype('float64'))

In [20]:
# 判断函数
np.any(arr > 0), np.all(arr > 0)

(True, False)

In [21]:
plt.hist(arr, bins=10)  # bins表示正态分布图中柱子个数

(array([  56.,  243.,  727., 1775., 2557., 2389., 1473.,  606.,  150.,
          24.]),
 array([-3.30392614, -2.62129836, -1.93867058, -1.2560428 , -0.57341502,
         0.10921276,  0.79184055,  1.47446833,  2.15709611,  2.83972389,
         3.52235167]),
 <a list of 10 Patch objects>)

In [23]:
plt.show()

In [24]:
# 将list转换成矩阵
np.array([range(1, 5), range(6, 10)])

array([[1, 2, 3, 4],
       [6, 7, 8, 9]])

In [25]:
# 创建一个所有元素都是0的数组
np.zeros(shape=(2, 3), dtype=float)

array([[0., 0., 0.],
       [0., 0., 0.]])

In [26]:
# 创建一个所有元素都是1的数组
np.ones(shape=(2, 3), dtype=int)

array([[1, 1, 1],
       [1, 1, 1]])

In [27]:
# 创建一个指定范围的一维数组,类似python中的range()
np.arange(start=1, stop=10, step=2)

array([1, 3, 5, 7, 9])

In [28]:
# 重组原数组并调整维度(形状): 将15个元素的一维数组重组成二维数组,二维数组有3个一维数组,每个一维数组5个元素
arr1 = np.reshape(a=np.arange(15), newshape=(3, 5))

In [29]:
arr1

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [30]:
# 将15个元素的一维数组重组成三维数组,三维数组有3个二维数组,每个二维数组有1个一维数组,每个一维数组5个元素
arr2 = np.reshape(a=np.arange(15), newshape=(3, 1, 5))

In [31]:
arr2

array([[[ 0,  1,  2,  3,  4]],

       [[ 5,  6,  7,  8,  9]],

       [[10, 11, 12, 13, 14]]])

In [32]:
arr2.flatten()  # 将多维数组展开成一维数组

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [33]:
arr = np.random.randint(low=1, high=50, size=(3, 4))

In [34]:
arr

array([[43, 15,  2,  6],
       [16, 44, 23, 43],
       [14,  7, 15, 21]])

In [35]:
arr.reshape(4, 3)  # reshape()是按照原来的元素顺序重新划分行列

array([[43, 15,  2],
       [ 6, 16, 44],
       [23, 43, 14],
       [ 7, 15, 21]])

In [36]:
arr.transpose()  # transpose()是将行列调换

array([[43, 16, 14],
       [15, 44,  7],
       [ 2, 23, 15],
       [ 6, 43, 21]])

In [37]:
arr1 = np.random.randint(low=1, high=20, size=(2, 3, 4))

In [38]:
arr1

array([[[ 1, 12,  7, 17],
        [ 4,  5, 10, 18],
        [11, 11,  4,  2]],

       [[14, 15, 11, 17],
        [ 8, 17, 10,  9],
        [15,  4,  8, 12]]])

In [39]:
arr1.transpose()  # 高维数组默认转换顺序：2,3,4 --> 4,3,2

array([[[ 1, 14],
        [ 4,  8],
        [11, 15]],

       [[12, 15],
        [ 5, 17],
        [11,  4]],

       [[ 7, 11],
        [10, 10],
        [ 4,  8]],

       [[17, 17],
        [18,  9],
        [ 2, 12]]])

In [40]:
arr1.transpose(1, 2, 0)  # 高维数组维度转换: transpose()参数是原维数所在下标-->2,3,4对应0,1,2

array([[[ 1, 14],
        [12, 15],
        [ 7, 11],
        [17, 17]],

       [[ 4,  8],
        [ 5, 17],
        [10, 10],
        [18,  9]],

       [[11, 15],
        [11,  4],
        [ 4,  8],
        [ 2, 12]]])

In [41]:
# 打乱数组(洗牌)
arr = np.arange(15)

In [42]:
arr

array([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14])

In [43]:
np.random.shuffle(arr)  # 注意：shuffle()函数只是给原数组洗牌,并不返回新的数组

In [44]:
arr

array([ 2, 14,  8,  3,  1,  9,  4,  7,  5,  6, 13, 10,  0, 11, 12])

In [45]:
arr1 = np.reshape(arr, newshape=(3, 5))

In [46]:
arr1

array([[ 2, 14,  8,  3,  1],
       [ 9,  4,  7,  5,  6],
       [13, 10,  0, 11, 12]])

In [47]:
arr = np.array([range(1, 5), range(3, 7)])  # 注意：矩阵中的小矩阵行列要一致

In [48]:
arr

array([[1, 2, 3, 4],
       [3, 4, 5, 6]])

In [49]:
np.unique(arr)  # 先去重再返回排序后的结果

array([1, 2, 3, 4, 5, 6])

In [50]:
# 1、一维数组切片与索引
arr1 = np.arange(10)

In [51]:
arr1, arr1[3:8]

(array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]), array([3, 4, 5, 6, 7]))

In [52]:
# 2、多维数组切片与索引
arr2 = np.arange(15).reshape(3, 5)

In [53]:
arr2

array([[ 0,  1,  2,  3,  4],
       [ 5,  6,  7,  8,  9],
       [10, 11, 12, 13, 14]])

In [54]:
arr2[1]  # 取出指定一维数组

array([5, 6, 7, 8, 9])

In [55]:
arr2[1][1:4]  # 取出指定一维数组的指定区间

array([6, 7, 8])

In [56]:
arr2[:, 1:4]  # 取出所有一维数组的指定区间 

array([[ 1,  2,  3],
       [ 6,  7,  8],
       [11, 12, 13]])

In [57]:
# 3、条件索引
year_arr = np.array([[2008, 2009, 2010], [2011, 2012, 2013], [2014, 2015, 2016]])

In [58]:
year_arr

array([[2008, 2009, 2010],
       [2011, 2012, 2013],
       [2014, 2015, 2016]])

In [59]:
year_arr >= 2011

array([[False, False, False],
       [ True,  True,  True],
       [ True,  True,  True]])

In [60]:
(year_arr >= 2011) & (year_arr % 2 == 0)

array([[False, False, False],
       [False,  True, False],
       [ True, False,  True]])

In [61]:
data_arr = np.random.randint(low=10, high=20, size=(3, 3))

In [62]:
data_arr

array([[19, 18, 18],
       [17, 15, 10],
       [19, 18, 11]])

In [63]:
data_arr[year_arr >= 2011]  # 返回符合条件的数组,True值保留False值舍弃

array([17, 15, 10, 19, 18, 11])

In [64]:
data_arr[(year_arr >= 2011) & (year_arr % 2 == 0)]

array([15, 19, 11])

In [65]:
# 创建一个二维数组
arr = np.array([range(1, 5), range(6, 10)])

In [66]:
arr

array([[1, 2, 3, 4],
       [6, 7, 8, 9]])

In [67]:
# 矩阵与矩阵运算
arr + arr, arr - arr, arr * arr, arr / arr

(array([[ 2,  4,  6,  8],
        [12, 14, 16, 18]]), array([[0, 0, 0, 0],
        [0, 0, 0, 0]]), array([[ 1,  4,  9, 16],
        [36, 49, 64, 81]]), array([[1., 1., 1., 1.],
        [1., 1., 1., 1.]]))

In [68]:
# 矩阵的广播运算
arr + 10, arr + 10., arr * 1, arr * 1.

(array([[11, 12, 13, 14],
        [16, 17, 18, 19]]), array([[11., 12., 13., 14.],
        [16., 17., 18., 19.]]), array([[1, 2, 3, 4],
        [6, 7, 8, 9]]), array([[1., 2., 3., 4.],
        [6., 7., 8., 9.]]))

In [69]:
arr = np.random.uniform(low=-5, high=5, size=(2, 3))

In [70]:
arr

array([[-1.67442301,  1.80811315,  0.39078687],
       [-3.11085739,  0.51083426, -3.62328397]])

In [71]:
np.ceil(arr)  # 向上取整

array([[-1.,  2.,  1.],
       [-3.,  1., -3.]])

In [72]:
np.floor(arr)  # 向下取整

array([[-2.,  1.,  0.],
       [-4.,  0., -4.]])

In [73]:
np.abs(arr)  # 取绝对值

array([[1.67442301, 1.80811315, 0.39078687],
       [3.11085739, 0.51083426, 3.62328397]])

In [74]:
np.rint(arr)  # 四舍五入

array([[-2.,  2.,  0.],
       [-3.,  1., -4.]])

In [75]:
np.isnan(arr)  # 判断是否为NaN(not a number)

array([[False, False, False],
       [False, False, False]])

In [76]:
np.multiply(arr, 10)  # 元素相乘

array([[-16.74423013,  18.08113146,   3.9078687 ],
       [-31.10857386,   5.10834258, -36.23283968]])

In [77]:
np.divide(arr, 10)  # 元素相除

array([[-0.1674423 ,  0.18081131,  0.03907869],
       [-0.31108574,  0.05108343, -0.3623284 ]])

In [78]:
np.mean(arr)  # 求全部元素平均值

-0.9498050155073807

In [79]:
np.mean(arr[1][1:3])  # 求指定区间元素平均值

-1.5562248549583768

In [80]:
np.sum(arr)  # 求所有元素和

-5.698830093044284

In [81]:
np.sum(arr, axis=0)  # 按列求和

array([-4.7852804,  2.3189474, -3.2324971])

In [82]:
np.sum(arr, axis=1)  # 按行求和

array([ 0.524477 , -6.2233071])

In [83]:
np.max(arr)  # 求最大值

1.8081131455177886

In [84]:
np.min(arr)  # 求最小值

-3.6232839678753646

In [85]:
np.std(arr)  # 标准差: 所有数据分别和平均数的差的和的平均数

1.9949717864242764

In [86]:
np.var(arr)  # 方差: 所有数据分别和平均数的差的平方的和的平均数

3.9799124286288685

In [87]:
np.argmax(arr)  # 数组里最大值的下标值(如果有多个重复数据取第一个)

1

In [88]:
np.argmin(arr)  # 数组里最小值的下标值

5

In [89]:
np.cumsum(arr)  # 返回一个一维数组,每个元素都是当前元素和前面所有元素的累加和

In [90]:
np.cumsum(arr).reshape(2, 3)

array([[-1.67442301,  0.13369013,  0.524477  ],
       [-2.58638038, -2.07554613, -5.69883009]])

In [91]:
np.cumprod(arr)  # 返回一个一维数组,每个元素都是当前元素和前面所有元素的累乘积

array([-1.67442301, -3.02754626, -1.18312533,  3.68053416,  1.88014294,
       -6.81229177])

In [92]:
np.cumprod(arr, axis=0)  # 按列统计

array([[-1.67442301,  1.80811315,  0.39078687],
       [ 5.2088912 ,  0.92364614, -1.4159318 ]])