In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.conv1=nn.Conv2d(1,6,5)
        self.conv2=nn.Conv2d(6,16,5)
        
        self.fn1=nn.Linear(16*5*5,120)
        self.fn2=nn.Linear(120,84)
        self.fn3=nn.Linear(84,10)
        
    def forward(self,x):
        x=F.max_pool2d(F.relu(self.conv1(x)),(2,2))
        x=F.max_pool2d(F.relu(self.conv2(x)),2)
        x=x.view(-1,self.num_flat_features(x))
        x=F.relu(self.fn1(x))
        x=F.relu(self.fn2(x))
        x=self.fn3(x)
        return x
    
    def num_flat_features(self,x):
        size=x.size()[1:]
        num_features=1
        for s in size:
            num_features*=s
        return num_features
    
net=Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fn1): Linear(in_features=400, out_features=120, bias=True)
  (fn2): Linear(in_features=120, out_features=84, bias=True)
  (fn3): Linear(in_features=84, out_features=10, bias=True)
)


In [3]:
params=list(net.parameters())
print(len(params))
print(params[0].size())   #每一层的weights和bias

10
torch.Size([6, 1, 5, 5])


In [5]:
input=torch.randn(1,1,32,32)
print(input.size()[1:][2])    #输出每个维度的大小
out=net(input)
print(out)

32
tensor([[ 0.1422, -0.0219, -0.0221, -0.0134, -0.0383,  0.0955,  0.0942,  0.0232,
         -0.0832,  0.0742]], grad_fn=<ThAddmmBackward>)


In [6]:
target=torch.randn(10)
print(target)
target=target.view(1,-1)
print(target)
criterion=nn.MSELoss()
loss=criterion(out,target)
print(loss)

tensor([-2.0589, -0.6986,  0.3063, -1.8509,  0.2840,  0.7609, -0.3242,  0.7246,
         1.9010,  2.0921])
tensor([[-2.0589, -0.6986,  0.3063, -1.8509,  0.2840,  0.7609, -0.3242,  0.7246,
          1.9010,  2.0921]])
tensor(1.8009, grad_fn=<MseLossBackward>)


In [7]:
print(loss.grad_fn)
print(loss.grad_fn.next_functions[0][0])
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])

<MseLossBackward object at 0x0000015BB853DDD8>
<ThAddmmBackward object at 0x0000015BB853D7F0>
<ExpandBackward object at 0x0000015BB853DDD8>


In [8]:
net.zero_grad()
print("conv1.bias.grad before backward")
print(net.conv1.bias.grad)

loss.backward()

print("conv1.bias.grad after backward")
print(net.conv1.bias.grad)

conv1.bias.grad before backward
None
conv1.bias.grad after backward
tensor([ 0.0164, -0.0071,  0.0046,  0.0252,  0.0116, -0.0205])


In [9]:
learning_rate=0.01
for f in net.parameters():
    f.data.sub_(f.grad.data*learning_rate)    #一次反向传播，更新参数，用新的参数计算误差
out=net(input)
print(criterion(out,target))

tensor(1.7319, grad_fn=<MseLossBackward>)


In [11]:
optimizer=torch.optim.SGD(net.parameters(),lr=0.01)
for i in range(10):
    output=net(input)
    loss=criterion(output,target)
    
    optimizer.zero_grad()
    loss.backward()
    optimizer.step()
    
    print('Step:',i,'| train loss:%.4f' % loss.data)

Step: 0 | train loss:1.7319
Step: 1 | train loss:1.6744
Step: 2 | train loss:1.6289
Step: 3 | train loss:1.5844
Step: 4 | train loss:1.5375
Step: 5 | train loss:1.4903
Step: 6 | train loss:1.4396
Step: 7 | train loss:1.3844
Step: 8 | train loss:1.3272
Step: 9 | train loss:1.2639
