# 改变数据形状

## 1 索引切片

In [2]:
import numpy as np

nd2 = np.random.randint(-5, 15, size=(5, 6))
nd2

array([[-3,  6,  2,  7, 10,  2],
       [ 1,  1,  2,  6,  7, 11],
       [ 1, -1,  8,  1,  6,  5],
       [-5,  8, 13,  6,  7,  3],
       [-4, -4, -3,  7,  2, 14]])

In [3]:
nd2[[1, 3]]

array([[ 1,  1,  2,  6,  7, 11],
       [-5,  8, 13,  6,  7,  3]])

In [4]:
nd2[:, [1, 3, 5]]

array([[ 6,  7,  2],
       [ 1,  6, 11],
       [-1,  1,  5],
       [ 8,  6,  3],
       [-4,  7, 14]])

In [5]:
nd2[[1, 3],][:, [1, 3, 5]]  # 第2、4行，2，4，6列

array([[ 1,  6, 11],
       [ 8,  6,  3]])

In [6]:
nd2[np.ix_([1, 3], [1, 3, 5])]  # 跟上面等效

array([[ 1,  6, 11],
       [ 8,  6,  3]])

## 2 形状改变

In [7]:
nd2 = np.random.randint(0, 100, size=(3, 4))  # 三行四列的数据
display(nd2)

array([[ 3,  5, 98, 62],
       [17,  0, 54, 74],
       [ 4, 36, 98, 65]])

In [8]:
nd2.reshape(4, 3)  # 改为 4行3列

array([[ 3,  5, 98],
       [62, 17,  0],
       [54, 74,  4],
       [36, 98, 65]])

In [9]:
nd2.reshape(2, 6)  # 改为 2行6列

array([[ 3,  5, 98, 62, 17,  0],
       [54, 74,  4, 36, 98, 65]])

In [10]:
nd2.reshape(-1, 6)  # -1表示占位符自动计算

array([[ 3,  5, 98, 62, 17,  0],
       [54, 74,  4, 36, 98, 65]])

In [11]:
nd2.reshape(-1)  # 无参数的情况

array([ 3,  5, 98, 62, 17,  0, 54, 74,  4, 36, 98, 65])

### 2.2 数据叠加

In [12]:
arr1 = np.random.randint(0, 100, size=(2, 4))
arr2 = np.random.randint(0, 100, size=(3, 4))
display(arr1, arr2)

array([[79, 63,  9, 38],
       [34, 72, 22, 23]])

array([[84, 81, 18, 81],
       [92,  0, 10, 23],
       [ 0, 83, 34,  3]])

In [13]:
np.concatenate((arr1, arr2, arr2), )  # 默认行合并

array([[79, 63,  9, 38],
       [34, 72, 22, 23],
       [84, 81, 18, 81],
       [92,  0, 10, 23],
       [ 0, 83, 34,  3],
       [84, 81, 18, 81],
       [92,  0, 10, 23],
       [ 0, 83, 34,  3]])

In [14]:
arr1 = np.random.randint(0, 100, size=(3, 4))
arr2 = np.random.randint(0, 100, size=(3, 5))
display(arr1, arr2)

array([[ 9, 33, 37, 92],
       [92, 52, 74, 32],
       [31, 67, 38, 39]])

array([[43, 83, 28, 96,  5],
       [52, 11, 12, 78,  3],
       [31, 91, 13, 47, 43]])

In [15]:
np.concatenate((arr1, arr2), axis=-1)  # axis=1  列合并 axis：维度

array([[ 9, 33, 37, 92, 43, 83, 28, 96,  5],
       [92, 52, 74, 32, 52, 11, 12, 78,  3],
       [31, 67, 38, 39, 31, 91, 13, 47, 43]])

In [16]:
np.concatenate((arr1, arr2), axis=1)  # axis=-1  最后一个纬度 axis：维度

array([[ 9, 33, 37, 92, 43, 83, 28, 96,  5],
       [92, 52, 74, 32, 52, 11, 12, 78,  3],
       [31, 67, 38, 39, 31, 91, 13, 47, 43]])

### 2.3 数据拆分

In [17]:
nd = np.random.randint(0, 100, size=(6, 9))
display(nd)
np.split(nd, 2)  # 平均拆分 默认行拆分

array([[86, 85, 37,  1, 65, 50, 39, 77, 16],
       [67, 85,  2, 53, 93, 42, 22, 42, 15],
       [88, 11,  7,  8, 31, 94, 91, 90, 98],
       [13, 61, 87, 79, 80, 61, 36, 65, 91],
       [19, 26, 89,  7,  3, 72, 53, 23, 39],
       [21, 99, 81, 86, 62, 84, 69, 87, 77]])

[array([[86, 85, 37,  1, 65, 50, 39, 77, 16],
        [67, 85,  2, 53, 93, 42, 22, 42, 15],
        [88, 11,  7,  8, 31, 94, 91, 90, 98]]),
 array([[13, 61, 87, 79, 80, 61, 36, 65, 91],
        [19, 26, 89,  7,  3, 72, 53, 23, 39],
        [21, 99, 81, 86, 62, 84, 69, 87, 77]])]

In [18]:
display(nd)
np.split(nd, 3, axis=-1)  # 列拆分


array([[86, 85, 37,  1, 65, 50, 39, 77, 16],
       [67, 85,  2, 53, 93, 42, 22, 42, 15],
       [88, 11,  7,  8, 31, 94, 91, 90, 98],
       [13, 61, 87, 79, 80, 61, 36, 65, 91],
       [19, 26, 89,  7,  3, 72, 53, 23, 39],
       [21, 99, 81, 86, 62, 84, 69, 87, 77]])

[array([[86, 85, 37],
        [67, 85,  2],
        [88, 11,  7],
        [13, 61, 87],
        [19, 26, 89],
        [21, 99, 81]]),
 array([[ 1, 65, 50],
        [53, 93, 42],
        [ 8, 31, 94],
        [79, 80, 61],
        [ 7,  3, 72],
        [86, 62, 84]]),
 array([[39, 77, 16],
        [22, 42, 15],
        [91, 90, 98],
        [36, 65, 91],
        [53, 23, 39],
        [69, 87, 77]])]

In [19]:
np.split(nd, [1, 4, 5], axis=1)  # 范围拆分

[array([[86],
        [67],
        [88],
        [13],
        [19],
        [21]]),
 array([[85, 37,  1],
        [85,  2, 53],
        [11,  7,  8],
        [61, 87, 79],
        [26, 89,  7],
        [99, 81, 86]]),
 array([[65],
        [93],
        [31],
        [80],
        [ 3],
        [62]]),
 array([[50, 39, 77, 16],
        [42, 22, 42, 15],
        [94, 91, 90, 98],
        [61, 36, 65, 91],
        [72, 53, 23, 39],
        [84, 69, 87, 77]])]

### 2.4数组转置

In [20]:
A = np.random.randint(0, 100, size=(3, 4))
display(A)

array([[44, 77, 40, 10],
       [85, 10,  4, 86],
       [95, 89,  3, 42]])

In [21]:
A.reshape(4, 3)

array([[44, 77, 40],
       [10, 85, 10],
       [ 4, 86, 95],
       [89,  3, 42]])

In [22]:
A.T

array([[44, 85, 95],
       [77, 10, 89],
       [40,  4,  3],
       [10, 86, 42]])

In [23]:
np.transpose(A, axes=[1, 0])

array([[44, 85, 95],
       [77, 10, 89],
       [40,  4,  3],
       [10, 86, 42]])

## 3.广播机制


In [24]:
arr1 = np.random.randint(0, 100, size=(5, 3))
arr2 = np.arange(1, 4)
display(arr1, arr2)

array([[79, 90, 21],
       [29,  5, 77],
       [84, 73, 26],
       [55, 81,  2],
       [69,  4, 29]])

array([1, 2, 3])

In [25]:
arr1 + arr2

array([[80, 92, 24],
       [30,  7, 80],
       [85, 75, 29],
       [56, 83,  5],
       [70,  6, 32]])

In [26]:
arr3 = np.random.randint(0, 10, size=(4, 5))
# arr4是 arr3每行的平均值
arr4 = arr3.mean(axis=1)

# 4行1列
display(arr3, arr4)

# 转为4行1列
display(arr4.reshape(4, 1))

# 通过广播机制
arr3 - arr4.reshape(4, 1)

array([[7, 0, 8, 7, 9],
       [3, 6, 2, 0, 3],
       [0, 6, 0, 2, 0],
       [3, 4, 2, 7, 1]])

array([6.2, 2.8, 1.6, 3.4])

array([[6.2],
       [2.8],
       [1.6],
       [3.4]])

array([[ 0.8, -6.2,  1.8,  0.8,  2.8],
       [ 0.2,  3.2, -0.8, -2.8,  0.2],
       [-1.6,  4.4, -1.6,  0.4, -1.6],
       [-0.4,  0.6, -1.4,  3.6, -2.4]])

In [27]:
arr1 = np.array([0, 1, 2, 3, 4, 5, 6, 7] * 3).reshape(3, 4, 2)
arr2 = np.array([0, 1, 2, 3, 4, 5, 6, 7]).reshape(4, 2)
display(arr1, arr2)
arr3 = arr1 + arr2
arr3

array([[[0, 1],
        [2, 3],
        [4, 5],
        [6, 7]],

       [[0, 1],
        [2, 3],
        [4, 5],
        [6, 7]],

       [[0, 1],
        [2, 3],
        [4, 5],
        [6, 7]]])

array([[0, 1],
       [2, 3],
       [4, 5],
       [6, 7]])

array([[[ 0,  2],
        [ 4,  6],
        [ 8, 10],
        [12, 14]],

       [[ 0,  2],
        [ 4,  6],
        [ 8, 10],
        [12, 14]],

       [[ 0,  2],
        [ 4,  6],
        [ 8, 10],
        [12, 14]]])

### 8 通用函数

In [28]:
np.sin(np.pi / 2)  # sin(2/pi rad) = 1 即90°

1.0

In [29]:
np.cos(np.pi / 2).round(10)  # cos(2/pi rad) = 0 即90°

0.0

In [30]:
np.sqrt(1024, )

32.0

In [31]:
np.power(8, 1 / 3)  # 开三次方就是 8^1/3

2.0

In [32]:
np.square(np.random.randint(0, 10, size=(2, 2)))  # 平方

array([[ 1,  4],
       [ 9, 16]])

In [33]:
8 ** (1 / 3)  # 开方

2.0

In [34]:
np.log10(10000)

4.0

In [35]:
x = np.random.randint(0, 10, size=10)
y = np.random.randint(0, 10, size=10)
display(x, y)
np.maximum(x, y)  # 取两个数组中每个元素的最大值合并为一个数组

array([8, 6, 3, 3, 1, 2, 8, 0, 4, 7])

array([8, 5, 0, 1, 2, 0, 8, 1, 0, 0])

array([8, 6, 3, 3, 2, 2, 8, 1, 4, 7])

In [36]:
arr2 = np.random.randint(0, 10, size=(2, 2))
display(arr2[0], arr2)
np.inner(arr2[0], arr2)  # 向量内积

array([8, 6])

array([[8, 6],
       [2, 9]])

array([100,  70])

In [37]:
arr = np.random.randint(0, 100, size=20)
arr

array([76, 62, 33,  1, 85, 30, 91,  4, 73, 56,  8, 76, 86, 57,  0, 15, 75,
        4, 96, 42])

In [38]:
np.clip(arr, 10, 20)  # 裁剪， <10的转为10， >20的转为20

array([20, 20, 20, 10, 20, 20, 20, 10, 20, 20, 10, 20, 20, 20, 10, 15, 20,
       10, 20, 20])

In [39]:
arr1 = np.array([1, 3, 5, 7, 9])
arr2 = np.array([2, 4, 6, 8, 10])
cond = np.array([True, False, False, True, True])
np.where(cond, arr1, arr2)  # 取满足条件的元素, cond是条件， 为True选择arr1, 为False选择arr2

array([1, 4, 6, 7, 9])

In [40]:
arr3 = np.random.randint(0, 30, size=20)
display(arr3)
np.where(arr3 > 10, arr3, 10)  # 将小于10的数据设置为10

array([ 0, 15, 23,  3, 11,  2,  5, 13,  2, 19,  2, 18,  7, 10,  2, 18, 19,
        7, 13, 26])

array([10, 15, 23, 10, 11, 10, 10, 13, 10, 19, 10, 18, 10, 10, 10, 18, 19,
       10, 13, 26])

In [41]:
arr4 = np.random.randint(-10, 180, size=50)
display(arr4)
np.where((arr4 < 0) | (arr4 > 150), -1, arr4)

array([ -1, 116, 123, 112,  65,  60,  24,  39, 123,  -8,  64,  80, 126,
        24, 104, 108, 177,  41, 144,  53, 169,  99,   3,  99,  54,  73,
        65,  11,   0,  36, 158, 117,  28,  19, 118, 103, 171, 105, 107,
       179, 174,  42, 152, -10, 172,   9, 175,  98,  25,  99])

array([ -1, 116, 123, 112,  65,  60,  24,  39, 123,  -1,  64,  80, 126,
        24, 104, 108,  -1,  41, 144,  53,  -1,  99,   3,  99,  54,  73,
        65,  11,   0,  36,  -1, 117,  28,  19, 118, 103,  -1, 105, 107,
        -1,  -1,  42,  -1,  -1,  -1,   9,  -1,  98,  25,  99])

In [42]:
arr1 = np.random.randint(0, 10, size=4)
arr2 = np.random.randint(0, 10, size=4)
display(arr1, arr2)
display(np.intersect1d(arr1, arr2))  # 交集
display(np.union1d(arr1, arr2))  # 并集
display(np.setdiff1d(arr1, arr2))  # 差集

array([1, 6, 7, 5])

array([9, 0, 8, 7])

array([7])

array([0, 1, 5, 6, 7, 8, 9])

array([1, 5, 6])

In [43]:
arr1 = np.random.randint(0, 100, size=10)
display(arr1)
print('最小值:', arr1.min())  # 最小值
print('取最大值索引:', arr1.argmax())  # 取最大值索引
print('取大于20的数据索引:', np.argwhere(arr1 > 20))  # 取大于20的数据索引
print('累加和:', np.cumsum(arr1))  # 累加和
arr2 = np.random.randint(0, 100, size=(4, 5))
display(arr2)
print('列平均:', arr2.mean(axis=0))  #列平均
print('行平均:', arr2.mean(axis=1))  # 行平均
print('协方差:', np.cov(arr2, rowvar=True))  # 协方差矩阵
print('相关性:', np.corrcoef(arr2, rowvar=True))  # 相关性系数

array([42, 33, 41, 63, 72, 98,  3, 43, 15, 26])

最小值: 3
取最大值索引: 5
取大于20的数据索引: [[0]
 [1]
 [2]
 [3]
 [4]
 [5]
 [7]
 [9]]
累加和: [ 42  75 116 179 251 349 352 395 410 436]


array([[18, 95, 97,  5, 48],
       [40, 26, 41, 46, 12],
       [27, 30, 86, 18, 91],
       [46, 88, 48, 33, 51]])

列平均: [32.75 59.75 68.   25.5  50.5 ]
行平均: [52.6 33.  50.4 53.2]
协方差: [[1813.3  -176.5   720.2   616.35]
 [-176.5   193.   -252.5  -138.  ]
 [ 720.2  -252.5  1232.3   -40.35]
 [ 616.35 -138.    -40.35  425.7 ]]
相关性: [[ 1.         -0.29835356  0.48179253  0.70152111]
 [-0.29835356  1.         -0.51775524 -0.48144714]
 [ 0.48179253 -0.51775524  1.         -0.05571004]
 [ 0.70152111 -0.48144714 -0.05571004  1.        ]]


# 9.线性代数

In [44]:
A = np.random.randint(0, 100, size=(2, 3))
B = np.random.randint(0, 100, size=(3, 2))
display(A, B)

array([[74, 51, 23],
       [68, 79, 33]])

array([[41, 30],
       [62, 45],
       [47, 19]])

In [45]:
np.dot(A, B)  # 矩阵乘法

array([[7277, 4952],
       [9237, 6222]])

In [46]:
A @ B  # 同样

array([[7277, 4952],
       [9237, 6222]])

In [47]:
np.set_printoptions(suppress=True)
from numpy.linalg import inv, det, eig, qr, svd

A = np.array([[1, 2, 3],
              [2, 3, 4],
              [4, 5, 8]])
b = inv(A)  # 逆矩阵
display(A, b)
A.dot(b)  # 矩阵乘

array([[1, 2, 3],
       [2, 3, 4],
       [4, 5, 8]])

array([[-2. ,  0.5,  0.5],
       [ 0. ,  2. , -1. ],
       [ 1. , -1.5,  0.5]])

array([[ 1.,  0., -0.],
       [ 0.,  1.,  0.],
       [ 0., -0.,  1.]])

In [48]:
det(A)  # 行列式

-2.0000000000000004

ValueError: could not convert string 'setosa' to float64 at row 0, column 5.