# 矩陣求導 $y = ||XW + b ||^2_F $; 

In [1]:
import torch
torch.manual_seed(0)
x = torch.randn( 10, 4, requires_grad=True)
w = torch.randn( 4, 4, requires_grad=True)
b = torch.randn( 10, 4, requires_grad=True)
print(x)
print(w)
print(b)
y = (x.mm(w) + b).pow(2).sum()
# y.requires_grad_(True)
print(y)

tensor([[-1.1258, -1.1524, -0.2506, -0.4339],
        [ 0.8487,  0.6920, -0.3160, -2.1152],
        [ 0.3223, -1.2633,  0.3500,  0.3081],
        [ 0.1198,  1.2377,  1.1168, -0.2473],
        [-1.3527, -1.6959,  0.5667,  0.7935],
        [ 0.5988, -1.5551, -0.3414,  1.8530],
        [-0.2159, -0.7425,  0.5627,  0.2596],
        [-0.1740, -0.6787,  0.9383,  0.4889],
        [ 1.2032,  0.0845, -1.2001, -0.0048],
        [-0.5181, -0.3067, -1.5810,  1.7066]], requires_grad=True)
tensor([[ 0.2055, -0.4503, -0.5731, -0.5554],
        [ 0.5943,  1.5419,  0.5073, -0.5910],
        [-1.3253,  0.1886, -0.0691, -0.4949],
        [-1.4959, -0.1938,  0.4455,  1.3253]], requires_grad=True)
tensor([[ 1.5091,  2.0820,  1.7067,  2.3804],
        [-1.1256, -0.3170, -1.0925, -0.0852],
        [ 0.3276, -0.7607, -1.5991,  0.0185],
        [-0.7504,  0.1854,  0.6211,  0.6382],
        [-0.0033, -0.5344,  1.1687,  0.3945],
        [ 1.9415,  0.7915, -0.0203, -0.4372],
        [-0.2188, -2.4351, -0.0729, -0

In [2]:
y.backward()
print(x.grad)
print(w.grad)
print(b.grad)

tensor([[ -5.5354,   2.2788,  -7.2747,   4.9560],
        [  7.0786,   7.9225,  -3.9229, -20.8719],
        [  3.7750, -13.6215,   1.8300,   5.0560],
        [ -2.5051,   8.0172,   4.6761,   0.5287],
        [ -3.8409, -13.8474,   4.4959,  19.6415],
        [ -1.1472, -12.0802,  -0.4455,  11.5021],
        [  1.9607, -13.7048,   3.0314,   8.1723],
        [  0.3539,  -5.1480,   3.1660,   5.3047],
        [  0.5524,   0.1245,  -3.4478,  -3.5855],
        [ -8.1827,  -3.6565,  -3.3562,  16.4601]])
tensor([[ 12.8560,   0.9793, -10.9075, -24.5453],
        [ 21.0108,  33.3000,  -1.3519, -40.4582],
        [-14.7987,  -2.2609,  -8.6005, -12.0511],
        [-28.4880, -20.5779,  12.3552,  46.0581]])
tensor([[ 3.1480,  1.6979,  3.1826,  6.4714],
        [ 6.0862,  1.4365, -4.2966, -7.2248],
        [-2.5635, -5.6951, -4.6232,  1.6427],
        [-2.2008,  4.5967,  1.9861, -2.0806],
        [-6.4545, -5.1746,  2.7957,  5.8385],
        [-2.3584, -4.5992, -0.6065,  5.5481],
        [-3.6774, -6.8

# Practice

In [3]:
x = torch.tensor([1.], requires_grad=True)
w = torch.tensor([2.], requires_grad=True)
b = torch.tensor([3.], requires_grad=True)
y = (w*x + b).pow(2)
y.backward()
print(w)
print(x)
print(b)
print(w.grad)
print(x.grad)
print(b.grad)

tensor([2.], requires_grad=True)
tensor([1.], requires_grad=True)
tensor([3.], requires_grad=True)
tensor([10.])
tensor([20.])
tensor([10.])


In [4]:
y = w*x + b
y.backward()
print(w.grad)
print(x.grad)
print(b.grad)


tensor([11.])
tensor([22.])
tensor([11.])


In [5]:
y = w*x + b
w.grad.zero_()
x.grad.zero_()
b.grad.zero_()
y.backward()
print(w.grad)
print(x.grad)
print(b.grad)

tensor([1.])
tensor([2.])
tensor([1.])


# 目標函數 $ f = || Y - Y_{pred} ||^2_F $ 

# $ h = XW_{1} + b_{1} $ ; $h_{sigmoid} =  sigmoid(h)$ ; $ Y_{pred} = h_{sigmoid}W_{2} + b_{2}$

# 表示方式 $h_{sigmoid} = h_s$ ; $sigmoid(h) = S(h) $ ; $ Y_{pred} = Y_{p}$

# 另外來看 $ f = ||Y - (S(XW_{1} + b_{1}). W_{2} + b_{2})||^{2}_{F}$ 

# 給定實驗資料

In [6]:
import torch
import numpy as np
torch.manual_seed(0)

x = torch.randn(100, 1, requires_grad=True)
y = torch.randn(100, 1, requires_grad=True)
w1 = torch.randn(1, 10, requires_grad=True)
w2 = torch.randn(10, 1, requires_grad=True)
b1 = torch.randn(100, 10, requires_grad=True)
b2 = torch.randn(100, 1, requires_grad=True)
print( "x : ", np.shape(x))
print( "y : ", np.shape(y))
print( "w1 : ", np.shape(w1))
print( "w2 : ", np.shape(w2))
print( "b1 : ", np.shape(b1))
print( "b2 : ", np.shape(b2))

x :  torch.Size([100, 1])
y :  torch.Size([100, 1])
w1 :  torch.Size([1, 10])
w2 :  torch.Size([10, 1])
b1 :  torch.Size([100, 10])
b2 :  torch.Size([100, 1])


In [7]:
import torch.nn as nn
tm = nn.Sigmoid()
s = tm(x.mm(w1))
# print(s)
yp = (s+b1).mm(w2)+b2
f1 = (y - yp).pow(2).sum()
ft = (y - yp).pow(2)
print(f1)

tensor(1010.4425, grad_fn=<SumBackward0>)


In [8]:
print( "x : ", x.grad)
print( "y : ", y.grad)
print( "w1 : ", w1.grad)
print( "w2 : ", w2.grad)
print( "b1 : ", b1.grad)
print( "b2 : ", b2.grad)

x :  None
y :  None
w1 :  None
w2 :  None
b1 :  None
b2 :  None


In [9]:
f1.backward()

#  直接求導

In [10]:
print( "x : ", x.grad)

x :  tensor([[-4.9110e-01],
        [ 4.3571e-01],
        [-3.9135e+00],
        [ 1.5481e+00],
        [-1.2776e-01],
        [-2.6933e+00],
        [ 5.1845e+00],
        [ 6.6954e-01],
        [ 1.2904e-01],
        [ 3.7789e-01],
        [ 3.7368e+00],
        [ 4.5151e+00],
        [-1.4528e+00],
        [ 1.4875e+00],
        [-7.0937e-01],
        [ 8.0065e+00],
        [ 1.2312e-01],
        [ 1.4152e-01],
        [ 6.9327e+00],
        [-1.0470e+00],
        [ 5.4671e+00],
        [ 6.1589e-03],
        [-7.6717e+00],
        [ 1.0838e-01],
        [ 3.7338e+00],
        [-6.0198e+00],
        [ 1.4813e+01],
        [-2.0631e+00],
        [-5.4940e-02],
        [-9.7939e-02],
        [-1.6882e+00],
        [-2.8643e+00],
        [ 1.0602e+00],
        [ 4.3974e+00],
        [-1.9600e+00],
        [-2.8201e+00],
        [ 5.3914e+00],
        [-1.5242e+00],
        [-2.8165e+00],
        [ 5.5835e+00],
        [-8.6938e+00],
        [ 1.3722e+00],
        [ 1.1579e-01],
      

In [11]:
print( "y : ", y.grad)

y :  tensor([[-3.1167e+00],
        [ 3.0445e+00],
        [-5.1600e+00],
        [ 2.3788e+00],
        [-3.7886e-01],
        [-5.9036e+00],
        [ 7.1526e+00],
        [-6.7351e+00],
        [ 1.7889e-01],
        [ 4.2877e+00],
        [ 5.2998e+00],
        [ 6.1919e+00],
        [-1.8052e+00],
        [ 1.4865e+01],
        [-4.3620e+00],
        [ 1.0535e+01],
        [ 2.4517e+00],
        [-2.8723e+00],
        [ 1.2525e+01],
        [-2.7681e+00],
        [ 1.0340e+01],
        [-3.7313e-01],
        [-1.0802e+01],
        [-1.4425e+00],
        [ 9.0801e+00],
        [-1.1168e+01],
        [ 1.8763e+01],
        [-2.6249e+00],
        [-1.5175e+00],
        [ 3.9725e+00],
        [-6.2894e+00],
        [-8.4017e+00],
        [ 2.0495e+00],
        [ 5.3750e+00],
        [-3.2184e+00],
        [-3.7134e+00],
        [ 8.3361e+00],
        [-1.8899e+00],
        [-5.6769e+00],
        [ 8.6466e+00],
        [-1.0751e+01],
        [ 3.6202e+00],
        [ 1.5667e-01],
      

In [12]:
print( "w1 : ", w1.grad)

w1 :  tensor([[ 4.3456, -3.1038, -1.3355, -0.7209,  1.2226,  4.0725, 14.6505, -4.9000,
          0.8338, -3.4824]])


In [13]:
print( "w2 : ", w2.grad)

w2 :  tensor([[-145.5252],
        [ 311.3998],
        [ 101.6342],
        [ -14.9336],
        [-155.0851],
        [-142.8668],
        [-506.1857],
        [ 228.3139],
        [ -63.1988],
        [ -11.7027]])


In [14]:
print( "b1 : ", b1.grad)

b1 :  tensor([[-1.5480e+00,  4.0829e+00,  1.2328e+00,  2.7888e-01, -5.2426e-01,
         -1.4734e+00, -6.1417e+00,  1.9575e+00, -3.5266e-01,  1.4434e+00],
        [ 1.5121e+00, -3.9883e+00, -1.2043e+00, -2.7242e-01,  5.1211e-01,
          1.4393e+00,  5.9994e+00, -1.9122e+00,  3.4449e-01, -1.4099e+00],
        [-2.5628e+00,  6.7597e+00,  2.0411e+00,  4.6171e-01, -8.6797e-01,
         -2.4394e+00, -1.0168e+01,  3.2409e+00, -5.8386e-01,  2.3896e+00],
        [ 1.1815e+00, -3.1163e+00, -9.4096e-01, -2.1286e-01,  4.0014e-01,
          1.1246e+00,  4.6877e+00, -1.4941e+00,  2.6917e-01, -1.1017e+00],
        [-1.8817e-01,  4.9632e-01,  1.4986e-01,  3.3900e-02, -6.3728e-02,
         -1.7911e-01, -7.4657e-01,  2.3796e-01, -4.2869e-02,  1.7545e-01],
        [-2.9321e+00,  7.7339e+00,  2.3352e+00,  5.2825e-01, -9.9305e-01,
         -2.7910e+00, -1.1633e+01,  3.7079e+00, -6.6800e-01,  2.7340e+00],
        [ 3.5525e+00, -9.3702e+00, -2.8293e+00, -6.4001e-01,  1.2032e+00,
          3.3815e+00,  1.4

In [15]:
print( "b2 : ", b2.grad)

b2 :  tensor([[ 3.1167e+00],
        [-3.0445e+00],
        [ 5.1600e+00],
        [-2.3788e+00],
        [ 3.7886e-01],
        [ 5.9036e+00],
        [-7.1526e+00],
        [ 6.7351e+00],
        [-1.7889e-01],
        [-4.2877e+00],
        [-5.2998e+00],
        [-6.1919e+00],
        [ 1.8052e+00],
        [-1.4865e+01],
        [ 4.3620e+00],
        [-1.0535e+01],
        [-2.4517e+00],
        [ 2.8723e+00],
        [-1.2525e+01],
        [ 2.7681e+00],
        [-1.0340e+01],
        [ 3.7313e-01],
        [ 1.0802e+01],
        [ 1.4425e+00],
        [-9.0801e+00],
        [ 1.1168e+01],
        [-1.8763e+01],
        [ 2.6249e+00],
        [ 1.5175e+00],
        [-3.9725e+00],
        [ 6.2894e+00],
        [ 8.4017e+00],
        [-2.0495e+00],
        [-5.3750e+00],
        [ 3.2184e+00],
        [ 3.7134e+00],
        [-8.3361e+00],
        [ 1.8899e+00],
        [ 5.6769e+00],
        [-8.6466e+00],
        [ 1.0751e+01],
        [-3.6202e+00],
        [-1.5667e-01],
     

In [16]:
# Kan Horst 