In [1]:
import torch
print(torch.__version__)

2.1.0+cpu


In [2]:
x = torch.randn(4,1)

In [3]:
print(x)

tensor([[ 1.7410],
        [ 1.1910],
        [-0.2778],
        [ 1.6618]])


In [4]:
print(x.requires_grad)

False


### AutoGrad

In [5]:
x = torch.randn(4,1, requires_grad=True)
y = torch.randn(4,1, requires_grad=True)
W = torch.randn(4,4)
print(x)
print(y)
print(W)

tensor([[-0.5466],
        [ 0.6775],
        [ 0.1534],
        [-1.6071]], requires_grad=True)
tensor([[-0.0073],
        [-0.3894],
        [-1.1882],
        [-1.2618]], requires_grad=True)
tensor([[-0.6017, -0.1036,  1.2151,  0.5697],
        [-0.1537,  1.2196, -0.9392,  0.3663],
        [ 0.0351, -0.1864,  1.4304,  0.7589],
        [-0.8146, -0.6637, -0.6465, -0.6400]])


## $z = x^T W y $

In [6]:
z = torch.mm(torch.mm(torch.t(x), W),y)
print(z)

tensor([[-2.0742]], grad_fn=<MmBackward0>)


In [7]:
z = x.t().mm(W).mm(y)
print(z)

tensor([[-2.0742]], grad_fn=<MmBackward0>)


In [8]:
print(x.grad)

None


In [9]:
print(y.grad)

None


In [10]:
z.backward()

In [11]:
print(x.grad)

tensor([[-2.1179],
        [ 0.1800],
        [-2.5849],
        [ 1.8401]])


In [12]:
print(W.mm(y))

tensor([[-2.1179],
        [ 0.1800],
        [-2.5849],
        [ 1.8401]], grad_fn=<MmBackward0>)


In [13]:
print(y.grad)

tensor([[ 1.5392],
        [ 1.9209],
        [-0.0421],
        [ 1.0818]])


In [14]:
print(W.t().mm(x))

tensor([[ 1.5392],
        [ 1.9209],
        [-0.0421],
        [ 1.0818]], grad_fn=<MmBackward0>)


In [None]:
x.grad.zero_()

In [None]:
y.grad.zero_()

In [None]:
print(x.grad)

### 默认情况下，定义的tensor属性requires_grad为false

In [None]:
x = torch.randn(4,1, requires_grad=True)
print(x)
y = torch.mm(torch.t(x),x)
print(y)
y.backward()

In [None]:
print(x.grad)

In [None]:
print(y)

print(x.grad)

print(2*x)

# Test

In [22]:
import torch
torch.manual_seed(0)

x = torch.randn(10,4, requires_grad=True)
W = torch.randn(4,4, requires_grad=True)
y = torch.randn(10,4)

print(x)
print(y)
print(W)

tensor([[-1.1258, -1.1524, -0.2506, -0.4339],
        [ 0.8487,  0.6920, -0.3160, -2.1152],
        [ 0.3223, -1.2633,  0.3500,  0.3081],
        [ 0.1198,  1.2377,  1.1168, -0.2473],
        [-1.3527, -1.6959,  0.5667,  0.7935],
        [ 0.5988, -1.5551, -0.3414,  1.8530],
        [-0.2159, -0.7425,  0.5627,  0.2596],
        [-0.1740, -0.6787,  0.9383,  0.4889],
        [ 1.2032,  0.0845, -1.2001, -0.0048],
        [-0.5181, -0.3067, -1.5810,  1.7066]], requires_grad=True)
tensor([[ 1.5091,  2.0820,  1.7067,  2.3804],
        [-1.1256, -0.3170, -1.0925, -0.0852],
        [ 0.3276, -0.7607, -1.5991,  0.0185],
        [-0.7504,  0.1854,  0.6211,  0.6382],
        [-0.0033, -0.5344,  1.1687,  0.3945],
        [ 1.9415,  0.7915, -0.0203, -0.4372],
        [-0.2188, -2.4351, -0.0729, -0.0340],
        [ 0.9625,  0.3492, -0.9215, -0.0562],
        [-0.6227, -0.4637,  1.9218, -0.4025],
        [ 0.1239,  1.1648,  0.9234,  1.3873]])
tensor([[ 0.2055, -0.4503, -0.5731, -0.5554],
        [ 0.

## Loss Function $f = ||max(XW,0)-Y||^2_F $

## $f = ||\hat{Y}-Y||^2_F $; $\hat{Y} = max(Z,0)$; $Z = XW$

In [23]:
f = (torch.clamp(x.mm(W), 0) - y).pow(2).sum()
print(f)

tensor(99.9048, grad_fn=<SumBackward0>)


In [None]:
z = x.mm(W)

In [None]:
f = (y_hat - y).pow(2).sum()

In [None]:
print(f)

In [24]:
# W.grad.zero_()
print(x.grad)


None


In [25]:
f.backward()

In [26]:
print(x.grad)

tensor([[  1.1002,   0.0860,   5.3377,   0.2788],
        [  0.9583,  10.4633, -13.5234, -16.3639],
        [ -0.8712,  -0.9272,  -0.7764,   2.0790],
        [ -1.4504,   5.6914,   0.7613,  -0.9693],
        [ -1.2892,  -3.4714,  -1.9788,   4.8091],
        [ -4.0523,  -4.3127,  -3.6114,   9.6703],
        [ -0.7312,  -0.7782,  -0.6516,   1.7449],
        [ -0.8191,  -0.8718,  -0.7300,   1.9547],
        [  1.0350,   2.9930,  -6.6743,  -7.5333],
        [ -2.4616,  -2.4243,  -2.1164,   5.7128]])
