# 1. Backprop

In [28]:
import numpy as np

In [29]:
X = np.array([[1., 2.]])
w = np.array([[0.5], [1.0]])
b = 0.1
y = 1.0

In [30]:
a = X @ w + b
loss = (a - y) ** 2

In [31]:
dL_da = 2 * (a - y)
da_dw = X.T
da_dX = w.T
da_db = 1
dL_dw = dL_da * da_dw
dL_db = dL_da * da_db
dL_dX = dL_da * da_dX

In [32]:
print(f"dL_dw: {dL_dw}")
print(f"dL_dX: {dL_dX}")
print(f"dL_db: {dL_db}")

dL_dw: [[3.2]
 [6.4]]
dL_dX: [[1.6 3.2]]
dL_db: [[3.2]]


# 2. MLP manually backprop 
day18 notebook

# 3. PyTorch Autograd

In [33]:
import torch

In [34]:
X = torch.tensor([[1.0, 2.0]], requires_grad=True)
w = torch.tensor([[0.5], [1.0]], requires_grad=True)
b = torch.tensor([0.1], requires_grad=True)

In [35]:
y_pred = X @ w + b
loss = (y_pred - 1.0) ** 2

In [36]:
loss.backward()

In [37]:
print(f"loss grad: {loss.grad}")
print(f"y_pred grad: {y_pred.grad}")
print(f"w grad: {w.grad}")
print(f"X grad: {X.grad}")
print(f"b grad: {b.grad}")

loss grad: None
y_pred grad: None
w grad: tensor([[3.2000],
        [6.4000]])
X grad: tensor([[1.6000, 3.2000]])
b grad: tensor([3.2000])


  print(f"loss grad: {loss.grad}")
  print(f"y_pred grad: {y_pred.grad}")


In [38]:
#detach
x = torch.tensor([2.0], requires_grad=True)
y = x ** 2
z = y.detach()  # ✅ 正确：z 是张量，且不再参与梯度追踪

In [39]:
# no_grad 示例
with torch.no_grad():
    print(x * 2)

tensor([4.])


# 4. Tensorflow GradientTape

In [40]:
import tensorflow as tf

In [41]:
X = tf.Variable([[1.0, 2.0]])
w = tf.Variable([[0.5], [1.0]])
b = tf.Variable([0.1])

In [42]:
with tf.GradientTape() as tape:
    y_pred = tf.matmul(X, w) + b
    loss = (y_pred - 1.0) ** 2

In [43]:
grads = tape.gradient(loss, [w, X, b])

In [44]:
print("dw:", grads[0].numpy())  # ∂loss/∂w
print("dX:", grads[1].numpy())  # ∂loss/∂X
print("db:", grads[2].numpy())  # ∂loss/∂b

dw: [[3.1999998]
 [6.3999996]]
dX: [[1.5999999 3.1999998]]
db: [3.1999998]
