## **关于CLASS**
- 子类、父类、继承
### *Pytorch输入的含义 [Batch, Channels, Height, Width]*
关于super的语法：
https://blog.csdn.net/weixin_43402775/article/details/106866960

In [2]:
class Person(object):
    def __init__(self,name,gender,age):
        self.name = name
        self.gender = gender
        self.age = age
 
class Student(Person):
    def __init__(self,name,gender,age,school,score):
        super(Student,self).__init__(name,gender,age)
        self.name = name.upper()  
        self.gender = gender.upper()
        self.school = school
        self.score = score
 
s = Student("steven",'male',18,'Middle school',87)
print(s.school)
print(s.name)

Middle school
STEVEN


In [39]:
import torch
import torch.nn as nn
import torch.nn.functional as F

#定义网络类
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        #定义第一层卷积层,输入维度=1，输出维度=6，卷积核大小3*3
        self.conv1 = nn.Conv2d(1,6,3)
        #定义第二层卷积层，输入维度=6，输出维度=16，卷积核大小3*3
        self.conv2 = nn.Conv2d(6,16,3)
        #定义3层全连接网络
        self.fc1 = nn.Linear(64,120)
        self.fc2 = nn.Linear(120,84)
        self.fc3 = nn.Linear(84,10)
        
    def forward(self,x):
        #注意：任意卷积层后要加激活层与池化层
        x = F.max_pool2d(F.relu(self.conv1(x)),2)
        x = F.max_pool2d(F.relu(self.conv2(x)),2)
        #经过卷积层后，张量经过变形后送入全连接层
        x = x.view(-1,self.num_flat_features(x))#-1代表的应该就是batchsize
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x
    
    def num_flat_features(self,x):
        size = x.size()[1:]
        num_features=1
        for s in size:
            num_features*=s
        return num_features

In [40]:
net=Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=64, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


### *下面验证卷积神经网络公式的计算*
- 将forward一步步分解

In [27]:
input=torch.randn([1,1,16,16])

In [28]:
net.conv1(input).shape#（16-3+2*0）/1+1=14

torch.Size([1, 6, 14, 14])

In [29]:
h1=F.max_pool2d(F.relu(net.conv1(X)),2)
h1.shape

torch.Size([1, 6, 7, 7])

In [31]:
net.conv2(h1).shape

torch.Size([1, 16, 5, 5])

In [30]:
h2=F.max_pool2d(F.relu(net.conv2(h1)),2)#（7-3+2*0）/1+1=5
h2.shape

torch.Size([1, 16, 2, 2])

In [33]:
h3 = h2.view(-1,net.num_flat_features(h2))
h3.shape

torch.Size([1, 64])

In [41]:
net(X)

tensor([[-0.0736, -0.0761, -0.0675,  0.0100,  0.0614, -0.0386,  0.0173,  0.0107,
         -0.0829, -0.0315]], grad_fn=<AddmmBackward>)

### 模型中的可训练参数，可使用net.parameters()获取

In [37]:
parameters=list(net.parameters())
print(len(parameters))
print(parameters[1].size())

10
torch.Size([6])


In [44]:
input  = torch.randn(1,1,16,16)
out = net(input)
print(out)
print(out.size())

tensor([[-0.0645, -0.0891, -0.0899,  0.0337,  0.0533, -0.0102,  0.0212,  0.0113,
         -0.1094, -0.0214]], grad_fn=<AddmmBackward>)
torch.Size([1, 10])


In [46]:
net.zero_grad()
out.backward(torch.randn(1,10))

### 应用nn.MSELoss计算损失的一个例子

In [47]:
output = net(input)
target = torch.randn(10)

target = target.view(1,-1) 
criterion = nn.MSELoss()

loss = criterion(output,target)
print(loss)

tensor(0.9490, grad_fn=<MseLossBackward>)


In [48]:
print(loss.grad_fn)

<MseLossBackward object at 0x7f9dc8fc5a50>


In [53]:
print(loss.grad_fn.next_functions[0][0])

<AddmmBackward object at 0x7f9dc8fe7350>


In [55]:
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])

<AccumulateGrad object at 0x7f9dc8fe7f90>


### 执行一个反向传播的小例子

In [57]:
#Pytorch 中执行梯度清零的操作代码
net.zero_grad()
print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)
#Pytorch 中执行反向传播的代码
loss.backward()
print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
tensor([0., 0., 0., 0., 0., 0.])
conv1.bias.grad after backward
tensor([-0.0027,  0.0018,  0.0247, -0.0204, -0.0306,  0.0219])


In [59]:
#SGD随机梯度下降法更新权重
#-1 用python编写程序
#注意sub_后面的下划线表示就地减法
learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

<generator object Module.parameters at 0x7f9dc8fcf850>

In [62]:
for f in net.parameters():
    print(f.shape)

torch.Size([6, 1, 3, 3])
torch.Size([6])
torch.Size([16, 6, 3, 3])
torch.Size([16])
torch.Size([120, 64])
torch.Size([120])
torch.Size([84, 120])
torch.Size([84])
torch.Size([10, 84])
torch.Size([10])


In [66]:
f.grad

tensor([-0.2868, -0.0014, -0.0175, -0.2414,  0.2023, -0.1670,  0.0091,  0.3180,
        -0.2582, -0.0458])

In [65]:
f.grad.data

tensor([-0.2868, -0.0014, -0.0175, -0.2414,  0.2023, -0.1670,  0.0091,  0.3180,
        -0.2582, -0.0458])

In [67]:
import torch.optim as optim
optimizer = optim.SGD(net.parameters(),lr=0.01)
#将优化器执行清零操作
optimizer.zero_grad()

output=net(input)
loss = criterion(output,target)

#对损失值执行反向传播的操作
loss.backward()
#参数的更新通过一行标准代码执行
optimizer.step()