批量归一化：防止梯度消失做得数值稳定方案
用于输出的激活函数前或输入前
沿着特征维或卷积通道维做归一化（批量归一化）
最初是认为减小了模型内部协变量偏移
后续发现不是，而是认为通过样品的均值方差加入了噪声，控制了模型复杂度
能够加快收敛，但是不影响模型精度

In [1]:
%matplotlib 

Using matplotlib backend: TkAgg


In [2]:
import torch

In [3]:
import torch.nn as nn

In [4]:
import torch.utils.data as data

In [5]:
import torchvision

In [6]:
import matplotlib.pyplot as pyp

In [7]:
import torchvision.transforms as trans

In [8]:
import numpy as np

In [9]:
import matplotlib.animation as animation

In [10]:
import scipy

In [11]:
from torch.nn import functional as F

In [12]:
transformCom=trans.Compose([trans.Resize(96),trans.ToTensor()])

In [13]:
trainSet=torchvision.datasets.FashionMNIST('../',train=True,download=True,transform=transformCom)
testSet=torchvision.datasets.FashionMNIST('../',train=False,download=True,transform=transformCom)

In [14]:
trainLoader=data.DataLoader(trainSet,batch_size=128,shuffle=True)
testLoader=data.DataLoader(testSet,batch_size=128,shuffle=True)

In [15]:
class Inception(nn.Module):
    def __init__(self, in_channels, c1, c2, c3, c4, **kwargs):
        super(Inception, self).__init__(**kwargs)
        self.p1_1 = nn.Conv2d(in_channels, c1, kernel_size=1)
        self.p2_1 = nn.Conv2d(in_channels, c2[0], kernel_size=1)
        self.p2_2 = nn.Conv2d(c2[0], c2[1], kernel_size=3, padding=1)
        self.p3_1 = nn.Conv2d(in_channels, c3[0], kernel_size=1)
        self.p3_2 = nn.Conv2d(c3[0], c3[1], kernel_size=5, padding=2)
        self.p4_1 = nn.MaxPool2d(kernel_size=3, stride=1, padding=1)
        self.p4_2 = nn.Conv2d(in_channels, c4, kernel_size=1)
        self.relu = torch.nn.ReLU()

    def forward(self, x):
        p1 = self.relu(self.p1_1(x))
        p2 = self.relu(self.p2_2(self.relu(self.p2_1(x))))
        p3 = self.relu(self.p3_2(self.relu(self.p3_1(x))))
        p4 = self.relu(self.p4_2(self.p4_1(x)))
        return torch.cat((p1, p2, p3, p4), dim=1)
        
#GoogLe-Net网络，relu激活
b1 = nn.Sequential(nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3),
                   nn.ReLU(), nn.MaxPool2d(kernel_size=3, stride=2,padding=1))

b2 = nn.Sequential(nn.Conv2d(64, 64, kernel_size=1), nn.ReLU(),
                   nn.Conv2d(64, 192, kernel_size=3, padding=1),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

b3 = nn.Sequential(Inception(192, 64, (96, 128), (16, 32), 32),
                   Inception(256, 128, (128, 192), (32, 96), 64),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

b4 = nn.Sequential(Inception(480, 192, (96, 208), (16, 48), 64),
                   Inception(512, 160, (112, 224), (24, 64), 64),
                   Inception(512, 128, (128, 256), (24, 64), 64),
                   Inception(512, 112, (144, 288), (32, 64), 64),
                   Inception(528, 256, (160, 320), (32, 128), 128),
                   nn.MaxPool2d(kernel_size=3, stride=2, padding=1))

b5 = nn.Sequential(Inception(832, 256, (160, 320), (32, 128), 128),
                   Inception(832, 384, (192, 384), (48, 128), 128),
                   nn.AdaptiveAvgPool2d((1,1)), nn.Flatten())

GoogLeNet = nn.Sequential(b1, b2, b3, b4, b5, nn.Linear(1024, 10))

In [16]:
X = torch.rand(size=(1, 1, 96, 96))
for layer in GoogLeNet:
    X = layer(X)
    print(layer.__class__.__name__, 'output shape:\t', X.shape)

Sequential output shape:	 torch.Size([1, 64, 24, 24])
Sequential output shape:	 torch.Size([1, 192, 12, 12])
Sequential output shape:	 torch.Size([1, 480, 6, 6])
Sequential output shape:	 torch.Size([1, 832, 3, 3])
Sequential output shape:	 torch.Size([1, 1024])
Linear output shape:	 torch.Size([1, 10])


In [17]:
torch.cuda.is_available()


True

In [18]:
GoogLeNet.to(torch.device("cuda:0"))

Sequential(
  (0): Sequential(
    (0): Conv2d(1, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (1): Sequential(
    (0): Conv2d(64, 64, kernel_size=(1, 1), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (3): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  )
  (2): Sequential(
    (0): Inception(
      (p1_1): Conv2d(192, 64, kernel_size=(1, 1), stride=(1, 1))
      (p2_1): Conv2d(192, 96, kernel_size=(1, 1), stride=(1, 1))
      (p2_2): Conv2d(96, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
      (p3_1): Conv2d(192, 16, kernel_size=(1, 1), stride=(1, 1))
      (p3_2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
      (p4_1): MaxPool2d(kernel_size=3, stride=1, padding=1, dilation=1, ceil_mode=False)
      (p4_2): Conv2d(192, 32, kernel_size=(1, 1),

In [19]:
optim=torch.optim.SGD(GoogLeNet.parameters(),lr=0.1)
loss=torch.nn.CrossEntropyLoss()
loss.cuda()

CrossEntropyLoss()

In [20]:
# lr, num_epochs, batch_size = 0.1, 10, 128
# train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size, resize=96)
# d2l.train_ch6(GoogLeNet, train_iter, test_iter, num_epochs, lr, d2l.try_gpu())

In [21]:
#创建曲线图并初始化
xdata=[]
ydata=[]
xdataTest=[]
ydataTest=[]
fig, ax = pyp.subplots()
line, = ax.plot(xdata, ydata,color='blue')
line2, = ax.plot(xdataTest, ydataTest,color='red')
pyp.title("Loss")
pyp.xlabel("epoch")
pyp.ylabel("Loss")
pyp.grid()
ax.legend((line,line2),('trainLoss','testLoss'))
#动态更新曲线训练函数
num_epoch=22
aveLoss=torch.zeros(num_epoch)
aveLoss=aveLoss.detach().numpy()
aveLossTest=torch.zeros(num_epoch)
# aveLossTest=aveLoss.detach().numpy()
def animationTrain(epoch):
    #每epoch训练代码区#
    #初始化参数为均匀分布，教训：初始化参数对训练是否收敛影响巨大，初始化为0完全不收敛
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)
    GoogLeNet.apply(init_weights)
    sumLoss=0
    trainnum=0;
    if epoch>0:
        for trainData,trainLabel in trainLoader:
            optim.zero_grad()
            Y=trainLabel.to(torch.device("cuda:0"))
            X=trainData.to(torch.device("cuda:0"))
            YHAT=GoogLeNet(X)
            comLoss=loss(YHAT,Y)
            comLoss.backward()
            optim.step()
            sumLoss=comLoss+sumLoss
            trainnum=trainnum+1
        aveLoss[epoch-1]=(sumLoss.to(torch.device("cpu"))/trainnum)
        sumLoss=0
        trainnum=0
        with torch.no_grad():
            for trainData,trainLabel in testLoader:
                Y=trainLabel.to(torch.device("cuda:0"))
                X=trainData.to(torch.device("cuda:0"))
                YHAT=GoogLeNet(X)
                comLoss=loss(YHAT,Y)
                sumLoss=comLoss+sumLoss
                trainnum=trainnum+1
        aveLossTest[epoch-1]=(sumLoss.to(torch.device("cpu"))/trainnum)   
        print(aveLoss)
        #图表数据更新#
        xdata.append(epoch-1)
        ydata.append(aveLoss[epoch-1])
        xdataTest.append(epoch-1)
        ydataTest.append(aveLossTest[epoch-1])
        line.set_xdata(xdata)
        line.set_ydata(ydata)
        line2.set_xdata(xdataTest)
        line2.set_ydata(ydataTest)
        ax.set_xlim(0, max(xdata))
        ax.set_ylim(0, max(ydata))
        #print(aveLoss[epoch])
        return line,line2,
#创建动画对象并显示，显示过程逐次调用Trian函数
anim = animation.FuncAnimation(fig, animationTrain, interval=20, blit=False,repeat=False,frames=num_epoch)
pyp.show()     


[1.9666845 0.        0.        0.        0.        0.        0.
 0.        0.        0.        0.        0.        0.        0.
 0.        0.        0.        0.        0.        0.        0.
 0.       ]


  ax.set_xlim(0, max(xdata))


[1.9666845 1.8212733 0.        0.        0.        0.        0.
 0.        0.        0.        0.        0.        0.        0.
 0.        0.        0.        0.        0.        0.        0.
 0.       ]
[1.9666845 1.8212733 1.6025636 0.        0.        0.        0.
 0.        0.        0.        0.        0.        0.        0.
 0.        0.        0.        0.        0.        0.        0.
 0.       ]
[1.9666845 1.8212733 1.6025636 1.5026474 0.        0.        0.
 0.        0.        0.        0.        0.        0.        0.
 0.        0.        0.        0.        0.        0.        0.
 0.       ]
[1.9666845 1.8212733 1.6025636 1.5026474 1.7259643 0.        0.
 0.        0.        0.        0.        0.        0.        0.
 0.        0.        0.        0.        0.        0.        0.
 0.       ]
[1.9666845 1.8212733 1.6025636 1.5026474 1.7259643 1.7607405 0.
 0.        0.        0.        0.        0.        0.        0.
 0.        0.        0.        0.        0.        0.   