<a href="https://colab.research.google.com/github/kuuuun/python_jupyter_notes/blob/main/chapter04_broadcasting.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Numpy 权威指南，第四章 广播机制

In [1]:
import numpy as np

## normal broadcasting

In [2]:
image = np.array([256,256,3])
color = np.array([3])

result = image + color
result

array([259, 259,   6])

In [7]:
a = np.arange(10)
print(a)
print(a.shape)

[0 1 2 3 4 5 6 7 8 9]
(10,)


In [6]:
import numpy as np

# 创建行向量 (10,)
a = np.arange(10)
# 创建同样的向量 b
b = np.arange(10)

# 直接相乘？
# print(a * b)
# 结果是 [0, 1, 4, ..., 81] -> 这是对应元素相乘 (Element-wise)，不是我们要的表格

# === 使用 newaxis 改造 ===
# 1. 把 a 变成列向量 (10, 1)
col_vec = a[:, np.newaxis]

# 2. b 保持原样，视作行向量 (10,) -> 广播时会被看作 (1, 10)
row_vec = b

print(f"列向量形状: {col_vec.shape}")
print(f"行向量形状: {row_vec.shape}")

# 3. 广播乘法
# (10, 1) * (1, 10) -> 结果广播为 (10, 10)
mult_table = col_vec * row_vec

print("\n九九乘法表 (部分):")
print(mult_table[:5, :5])
print("\n九九乘法表 (all):")
print(mult_table)

(10,)
列向量形状: (10, 1)
行向量形状: (10,)

九九乘法表 (部分):
[[ 0  0  0  0  0]
 [ 0  1  2  3  4]
 [ 0  2  4  6  8]
 [ 0  3  6  9 12]
 [ 0  4  8 12 16]]

九九乘法表 (all):
[[ 0  0  0  0  0  0  0  0  0  0]
 [ 0  1  2  3  4  5  6  7  8  9]
 [ 0  2  4  6  8 10 12 14 16 18]
 [ 0  3  6  9 12 15 18 21 24 27]
 [ 0  4  8 12 16 20 24 28 32 36]
 [ 0  5 10 15 20 25 30 35 40 45]
 [ 0  6 12 18 24 30 36 42 48 54]
 [ 0  7 14 21 28 35 42 49 56 63]
 [ 0  8 16 24 32 40 48 56 64 72]
 [ 0  9 18 27 36 45 54 63 72 81]]


## 按轴标准化

In [8]:
import numpy as np

# 1. 模拟数据
# 5个城市，365天，温度在 0-30 度之间
rng = np.random.default_rng(42)
temps = rng.integers(0, 30, size=(5, 365))

print(f"原始数据形状: {temps.shape}")

# 2. 计算每个城市的平均值 (保留维度是关键！)
# axis=1 表示沿着“天数”这个轴把数据压扁，算出每个城市的均值
# 关键点：keepdims=True
# 如果不加 keepdims=True，结果形状是 (5,)
# 加了 keepdims=True，结果形状是 (5, 1)，完美适配广播规则
city_means = temps.mean(axis=1, keepdims=True)
city_stds = temps.std(axis=1, keepdims=True)

print(f"均值数组形状 (keepdims=True): {city_means.shape}")

# 3. 广播计算
# (5, 365) - (5, 1) -> 成功广播
# (5, 365) / (5, 1) -> 成功广播
normalized_temps = (temps - city_means) / city_stds

print("\n标准化后前 5 天的数据 (前两个城市):")
print(np.round(normalized_temps[:2, :5], 2))

# 4. 验证：标准化后，每个城市的均值应该接近 0，标准差接近 1
print("\n验证标准化结果:")
print("新均值:", np.round(normalized_temps.mean(axis=1), 2))
print("新标准差:", np.round(normalized_temps.std(axis=1), 2))

原始数据形状: (5, 365)
均值数组形状 (keepdims=True): (5, 1)

标准化后前 5 天的数据 (前两个城市):
[[-1.5   1.01  0.53 -0.18 -0.3 ]
 [ 0.03 -0.08 -0.65 -1.67  0.94]]

验证标准化结果:
新均值: [-0. -0.  0. -0. -0.]
新标准差: [1. 1. 1. 1. 1.]


## 计算5个点中，任意2个点的距离

In [16]:
# 生成 5 个点的坐标 (x, y)
coords = np.random.rand(5, 2)

# coords 形状: (5, 2)
# 我们需要构造两个数组进行相减：
# A: shape (5, 1, 2) -> 代表点 i
# B: shape (1, 5, 2) -> 代表点 j

A = coords[:, np.newaxis, :]
B = coords[np.newaxis, :, :]
print(A)
print(A.shape)
# (5,1,2)
# [[x,y]],[[]],[[]],[[]],[[]]
print(B)
print(B.shape)
# (1,5,2)
# [[[x,y],[],[],[],[]]]

# 广播相减
# (5, 1, 2) - (1, 5, 2) -> 结果 (5, 5, 2)
# 含义：diff[i, j] = [x_i-x_j, y_i-y_j]
diff = A - B
print(diff)
print(diff.shape)


# 计算平方和并开根号
# 在最后一个维度 (axis=2) 上求和，也就是把 dx^2 + dy^2 加起来
# axis=2, 就是在x,y上求和
dist_matrix = np.sqrt(np.sum(diff**2, axis=2))

print("距离矩阵形状:", dist_matrix.shape)
print("点0 到其他点的距离:", np.round(dist_matrix[0], 2))

[[[0.85475298 0.41193868]]

 [[0.55032437 0.95315609]]

 [[0.45028937 0.88723122]]

 [[0.20774349 0.32062494]]

 [[0.68205607 0.08845953]]]
(5, 1, 2)
[[[0.85475298 0.41193868]
  [0.55032437 0.95315609]
  [0.45028937 0.88723122]
  [0.20774349 0.32062494]
  [0.68205607 0.08845953]]]
(1, 5, 2)
[[[ 0.          0.        ]
  [ 0.30442861 -0.54121741]
  [ 0.40446361 -0.47529254]
  [ 0.64700949  0.09131373]
  [ 0.1726969   0.32347914]]

 [[-0.30442861  0.54121741]
  [ 0.          0.        ]
  [ 0.100035    0.06592487]
  [ 0.34258088  0.63253114]
  [-0.13173171  0.86469656]]

 [[-0.40446361  0.47529254]
  [-0.100035   -0.06592487]
  [ 0.          0.        ]
  [ 0.24254588  0.56660627]
  [-0.2317667   0.79877169]]

 [[-0.64700949 -0.09131373]
  [-0.34258088 -0.63253114]
  [-0.24254588 -0.56660627]
  [ 0.          0.        ]
  [-0.47431259  0.23216541]]

 [[-0.1726969  -0.32347914]
  [ 0.13173171 -0.86469656]
  [ 0.2317667  -0.79877169]
  [ 0.47431259 -0.23216541]
  [ 0.          0.        ]]

In [17]:
pt1 = np.array([10,10])
pt2 = np.array([5,20])
print(pt1,pt2)

diff = pt1 - pt2
print(diff)
distance = np.sqrt(np.sum(diff**2,axis=0))
print(distance)

[10 10] [ 5 20]
[  5 -10]
11.180339887498949
