## 代码重复部分

In [1]:
import torch
import torch.nn as nn
import torchvision 
import torch.utils.data as Data
 
torch.manual_seed(1)  # 设置随机种子, 用于复现
 
# 超参数
EPOCH = 1       # 前向后向传播迭代次数
LR = 0.001      # 学习率 learning rate 
BATCH_SIZE = 50 # 批量训练时候一次送入数据的size
DOWNLOAD_MNIST = True 
 
# 下载mnist手写数据集
# 训练集
train_data = torchvision.datasets.MNIST(  
    root = './MNIST/',                      
    train = True,                            
    transform = torchvision.transforms.ToTensor(),                                                
    download=DOWNLOAD_MNIST 
)
 
# 测试集
test_data = torchvision.datasets.MNIST(root='./MNIST/', train=False)  # train设置为False表示获取测试集
 
# 一个批训练 50个样本, 1 channel通道, 图片尺寸 28x28 size:(50, 1, 28, 28)
train_loader = Data.DataLoader(
    dataset = train_data,
    batch_size=BATCH_SIZE,
    shuffle=True
) 
#  测试数据预处理；只测试前2000个
test_x = torch.unsqueeze(test_data.data,dim=1).float()[:2000] / 255.0
# shape from (2000, 28, 28) to (2000, 1, 28, 28)
test_y = test_data.targets[:2000]

  from . import _distributor_init


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./MNIST/MNIST\raw\train-images-idx3-ubyte.gz


100.1%

Extracting ./MNIST/MNIST\raw\train-images-idx3-ubyte.gz to ./MNIST/MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./MNIST/MNIST\raw\train-labels-idx1-ubyte.gz


113.5%

Extracting ./MNIST/MNIST\raw\train-labels-idx1-ubyte.gz to ./MNIST/MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./MNIST/MNIST\raw\t10k-images-idx3-ubyte.gz


100.4%

Extracting ./MNIST/MNIST\raw\t10k-images-idx3-ubyte.gz to ./MNIST/MNIST\raw
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./MNIST/MNIST\raw\t10k-labels-idx1-ubyte.gz


180.4%

Extracting ./MNIST/MNIST\raw\t10k-labels-idx1-ubyte.gz to ./MNIST/MNIST\raw
Processing...
Done!


## 定义网络结构

In [3]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN,self).__init__()
 
        self.conv1 = nn.Sequential(
            nn.Conv2d(                      # 输入的图片 （1，28，28）
                in_channels=1,
                out_channels=16,            # 经过一个卷积层之后 （16,28,28）
                kernel_size=5,
                stride=1,                    # 如果想要 con2d 出来的图片长宽没有变化, padding=(kernel_size-1)/2 当 stride=1
                padding=2
            ),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)      # 经过池化层处理，维度为（16,14,14）
        )
 
        self.conv2 = nn.Sequential(
            nn.Conv2d(                         # 输入（16,14,14）
                in_channels=16,
                out_channels=32,
                kernel_size=5,
                stride=1,
                padding=2
            ),                                 # 输出（32,14,14）
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2)        # 输出（32,7,7）
        )
 
        self.out = nn.Linear(32*7*7,10)
 
    def forward(self, x):
        x = self.conv1(x)                     #（batch_size,16,14,14）
        x = self.conv2(x)                     # 输出（batch_size,32,7,7）
        x = x.view(x.size(0),-1)              # (batch_size,32*7*7)
        out = self.out(x)                     # (batch_size,10)
        return out
 
cnn = CNN()

## Adam 原代码也是常用优化器

In [5]:
optimizer = torch.optim.Adam(cnn.parameters(),lr=LR) # 定义优化器
loss_func = nn.CrossEntropyLoss() # 定义损失函数
 
for epoch in range(EPOCH):
 
    for step,(batch_x,batch_y) in enumerate(train_loader):
        pred_y = cnn(batch_x)
        loss = loss_func(pred_y,batch_y)
        optimizer.zero_grad() # 清空上一层梯度
        loss.backward() # 反向传播
        optimizer.step() # 更新优化器的学习率，一般按照epoch为单位进行更新
 
        if step % 50 == 0:
            test_output = cnn(test_x)
            pred_y = torch.max(test_output, 1)[1].numpy()  # torch.max(test_out,1)返回的是test_out中每一行最大的数)
                                                                # 返回的形式为torch.return_types.max(
                                                                #           values=tensor([0.7000, 0.9000]),
                                                                #           indices=tensor([2, 2]))
                                                                # 后面的[1]代表获取indices
            print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy())
 
 
# 打印前十个测试结果和真实结果进行对比
test_output = cnn(test_x[:10])
pred_y = torch.max(test_output, 1)[1].numpy()
print(pred_y, 'prediction number')
print(test_y[:10].numpy(), 'real number')

Epoch:  0 | train loss: 0.0219
Epoch:  0 | train loss: 0.0763
Epoch:  0 | train loss: 0.0519
Epoch:  0 | train loss: 0.0490
Epoch:  0 | train loss: 0.0113
Epoch:  0 | train loss: 0.0628
Epoch:  0 | train loss: 0.0354
Epoch:  0 | train loss: 0.0361
Epoch:  0 | train loss: 0.2080
Epoch:  0 | train loss: 0.0424
Epoch:  0 | train loss: 0.0497
Epoch:  0 | train loss: 0.0305
Epoch:  0 | train loss: 0.0716
Epoch:  0 | train loss: 0.0401
Epoch:  0 | train loss: 0.0052
Epoch:  0 | train loss: 0.0431
Epoch:  0 | train loss: 0.0015
Epoch:  0 | train loss: 0.0311
Epoch:  0 | train loss: 0.0341
Epoch:  0 | train loss: 0.0399
Epoch:  0 | train loss: 0.0017
Epoch:  0 | train loss: 0.0269
Epoch:  0 | train loss: 0.0291
Epoch:  0 | train loss: 0.0261
[7 2 1 0 4 1 4 9 5 9] prediction number
[7 2 1 0 4 1 4 9 5 9] real number


## SGD部分

### SGD

In [6]:
optimizer = torch.optim.SGD(cnn.parameters(),lr=LR) # 定义优化器
loss_func = nn.CrossEntropyLoss() # 定义损失函数
 
for epoch in range(EPOCH):
 
    for step,(batch_x,batch_y) in enumerate(train_loader):
        pred_y = cnn(batch_x)
        loss = loss_func(pred_y,batch_y)
        optimizer.zero_grad() # 清空上一层梯度
        loss.backward() # 反向传播
        optimizer.step() # 更新优化器的学习率，一般按照epoch为单位进行更新
 
        if step % 50 == 0:
            test_output = cnn(test_x)
            pred_y = torch.max(test_output, 1)[1].numpy()  # torch.max(test_out,1)返回的是test_out中每一行最大的数)
                                                                # 返回的形式为torch.return_types.max(
                                                                #           values=tensor([0.7000, 0.9000]),
                                                                #           indices=tensor([2, 2]))
                                                                # 后面的[1]代表获取indices
            print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy())
 
 
# 打印前十个测试结果和真实结果进行对比
test_output = cnn(test_x[:10])
pred_y = torch.max(test_output, 1)[1].numpy()
print(pred_y, 'prediction number')
print(test_y[:10].numpy(), 'real number')

Epoch:  0 | train loss: 0.0358
Epoch:  0 | train loss: 0.0203
Epoch:  0 | train loss: 0.0930
Epoch:  0 | train loss: 0.0050
Epoch:  0 | train loss: 0.0183
Epoch:  0 | train loss: 0.0031
Epoch:  0 | train loss: 0.0385
Epoch:  0 | train loss: 0.0474
Epoch:  0 | train loss: 0.0311
Epoch:  0 | train loss: 0.0766
Epoch:  0 | train loss: 0.0707
Epoch:  0 | train loss: 0.0133
Epoch:  0 | train loss: 0.0020
Epoch:  0 | train loss: 0.1394
Epoch:  0 | train loss: 0.0339
Epoch:  0 | train loss: 0.0578
Epoch:  0 | train loss: 0.1446
Epoch:  0 | train loss: 0.0105
Epoch:  0 | train loss: 0.0718
Epoch:  0 | train loss: 0.0155
Epoch:  0 | train loss: 0.0072
Epoch:  0 | train loss: 0.0538
Epoch:  0 | train loss: 0.0438
Epoch:  0 | train loss: 0.0042
[7 2 1 0 4 1 4 9 5 9] prediction number
[7 2 1 0 4 1 4 9 5 9] real number


### SGD-M

In [7]:
optimizer = torch.optim.SGD(cnn.parameters(),lr=LR, momentum=0.9) # 定义优化器
loss_func = nn.CrossEntropyLoss() # 定义损失函数
 
for epoch in range(EPOCH):
 
    for step,(batch_x,batch_y) in enumerate(train_loader):
        pred_y = cnn(batch_x)
        loss = loss_func(pred_y,batch_y)
        optimizer.zero_grad() # 清空上一层梯度
        loss.backward() # 反向传播
        optimizer.step() # 更新优化器的学习率，一般按照epoch为单位进行更新
 
        if step % 50 == 0:
            test_output = cnn(test_x)
            pred_y = torch.max(test_output, 1)[1].numpy()  # torch.max(test_out,1)返回的是test_out中每一行最大的数)
                                                                # 返回的形式为torch.return_types.max(
                                                                #           values=tensor([0.7000, 0.9000]),
                                                                #           indices=tensor([2, 2]))
                                                                # 后面的[1]代表获取indices
            print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy())
 
 
# 打印前十个测试结果和真实结果进行对比
test_output = cnn(test_x[:10])
pred_y = torch.max(test_output, 1)[1].numpy()
print(pred_y, 'prediction number')
print(test_y[:10].numpy(), 'real number')

Epoch:  0 | train loss: 0.0242
Epoch:  0 | train loss: 0.0265
Epoch:  0 | train loss: 0.0045
Epoch:  0 | train loss: 0.0529
Epoch:  0 | train loss: 0.0363
Epoch:  0 | train loss: 0.0063
Epoch:  0 | train loss: 0.0937
Epoch:  0 | train loss: 0.0706
Epoch:  0 | train loss: 0.0135
Epoch:  0 | train loss: 0.0084
Epoch:  0 | train loss: 0.0038
Epoch:  0 | train loss: 0.0059
Epoch:  0 | train loss: 0.0231
Epoch:  0 | train loss: 0.0129
Epoch:  0 | train loss: 0.0309
Epoch:  0 | train loss: 0.0122
Epoch:  0 | train loss: 0.0036
Epoch:  0 | train loss: 0.0110
Epoch:  0 | train loss: 0.0458
Epoch:  0 | train loss: 0.0034
Epoch:  0 | train loss: 0.0059
Epoch:  0 | train loss: 0.0745
Epoch:  0 | train loss: 0.0289
Epoch:  0 | train loss: 0.0357
[7 2 1 0 4 1 4 9 5 9] prediction number
[7 2 1 0 4 1 4 9 5 9] real number


### NAG

In [8]:
optimizer = torch.optim.SGD(cnn.parameters(),lr=LR, momentum=0.9,nesterov=True) # 定义优化器
loss_func = nn.CrossEntropyLoss() # 定义损失函数
 
for epoch in range(EPOCH):
 
    for step,(batch_x,batch_y) in enumerate(train_loader):
        pred_y = cnn(batch_x)
        loss = loss_func(pred_y,batch_y)
        optimizer.zero_grad() # 清空上一层梯度
        loss.backward() # 反向传播
        optimizer.step() # 更新优化器的学习率，一般按照epoch为单位进行更新
 
        if step % 50 == 0:
            test_output = cnn(test_x)
            pred_y = torch.max(test_output, 1)[1].numpy()  # torch.max(test_out,1)返回的是test_out中每一行最大的数)
                                                                # 返回的形式为torch.return_types.max(
                                                                #           values=tensor([0.7000, 0.9000]),
                                                                #           indices=tensor([2, 2]))
                                                                # 后面的[1]代表获取indices
            print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy())
 
 
# 打印前十个测试结果和真实结果进行对比
test_output = cnn(test_x[:10])
pred_y = torch.max(test_output, 1)[1].numpy()
print(pred_y, 'prediction number')
print(test_y[:10].numpy(), 'real number')

Epoch:  0 | train loss: 0.0348
Epoch:  0 | train loss: 0.0679
Epoch:  0 | train loss: 0.0084
Epoch:  0 | train loss: 0.0175
Epoch:  0 | train loss: 0.0277
Epoch:  0 | train loss: 0.0159
Epoch:  0 | train loss: 0.0111
Epoch:  0 | train loss: 0.0153
Epoch:  0 | train loss: 0.0116
Epoch:  0 | train loss: 0.0022
Epoch:  0 | train loss: 0.0047
Epoch:  0 | train loss: 0.0055
Epoch:  0 | train loss: 0.0037
Epoch:  0 | train loss: 0.0274
Epoch:  0 | train loss: 0.0155
Epoch:  0 | train loss: 0.0882
Epoch:  0 | train loss: 0.0111
Epoch:  0 | train loss: 0.0129
Epoch:  0 | train loss: 0.1160
Epoch:  0 | train loss: 0.0247
Epoch:  0 | train loss: 0.0587
Epoch:  0 | train loss: 0.0425
Epoch:  0 | train loss: 0.0020
Epoch:  0 | train loss: 0.0032
[7 2 1 0 4 1 4 9 5 9] prediction number
[7 2 1 0 4 1 4 9 5 9] real number


## Adagrad

In [9]:
optimizer = torch.optim.Adagrad(cnn.parameters(),lr=LR) # 定义优化器
loss_func = nn.CrossEntropyLoss() # 定义损失函数
 
for epoch in range(EPOCH):
 
    for step,(batch_x,batch_y) in enumerate(train_loader):
        pred_y = cnn(batch_x)
        loss = loss_func(pred_y,batch_y)
        optimizer.zero_grad() # 清空上一层梯度
        loss.backward() # 反向传播
        optimizer.step() # 更新优化器的学习率，一般按照epoch为单位进行更新
 
        if step % 50 == 0:
            test_output = cnn(test_x)
            pred_y = torch.max(test_output, 1)[1].numpy()  # torch.max(test_out,1)返回的是test_out中每一行最大的数)
                                                                # 返回的形式为torch.return_types.max(
                                                                #           values=tensor([0.7000, 0.9000]),
                                                                #           indices=tensor([2, 2]))
                                                                # 后面的[1]代表获取indices
            print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy())
 
 
# 打印前十个测试结果和真实结果进行对比
test_output = cnn(test_x[:10])
pred_y = torch.max(test_output, 1)[1].numpy()
print(pred_y, 'prediction number')
print(test_y[:10].numpy(), 'real number')

Epoch:  0 | train loss: 0.0019
Epoch:  0 | train loss: 0.0148
Epoch:  0 | train loss: 0.0064
Epoch:  0 | train loss: 0.0221
Epoch:  0 | train loss: 0.0038
Epoch:  0 | train loss: 0.0312
Epoch:  0 | train loss: 0.0347
Epoch:  0 | train loss: 0.0158
Epoch:  0 | train loss: 0.0020
Epoch:  0 | train loss: 0.0872
Epoch:  0 | train loss: 0.0218
Epoch:  0 | train loss: 0.0405
Epoch:  0 | train loss: 0.0017
Epoch:  0 | train loss: 0.0030
Epoch:  0 | train loss: 0.0502
Epoch:  0 | train loss: 0.0018
Epoch:  0 | train loss: 0.0041
Epoch:  0 | train loss: 0.0145
Epoch:  0 | train loss: 0.0045
Epoch:  0 | train loss: 0.0052
Epoch:  0 | train loss: 0.0064
Epoch:  0 | train loss: 0.0013
Epoch:  0 | train loss: 0.0360
Epoch:  0 | train loss: 0.0174
[7 2 1 0 4 1 4 9 5 9] prediction number
[7 2 1 0 4 1 4 9 5 9] real number


## RMSProp (torch版本低导致，有兴趣自行测试)

In [10]:
optimizer = torch.optim.RMSprop(cnn.parameters(),lr=0.001, alpha=0.9) # 定义优化器
loss_func = nn.CrossEntropyLoss() # 定义损失函数
 
for epoch in range(EPOCH):
 
    for step,(batch_x,batch_y) in enumerate(train_loader):
        pred_y = cnn(batch_x)
        loss = loss_func(pred_y,batch_y)
        optimizer.zero_grad() # 清空上一层梯度
        loss.backward() # 反向传播
        optimizer.step() # 更新优化器的学习率，一般按照epoch为单位进行更新
 
        if step % 50 == 0:
            test_output = cnn(test_x)
            pred_y = torch.max(test_output, 1)[1].numpy()  # torch.max(test_out,1)返回的是test_out中每一行最大的数)
                                                                # 返回的形式为torch.return_types.max(
                                                                #           values=tensor([0.7000, 0.9000]),
                                                                #           indices=tensor([2, 2]))
                                                                # 后面的[1]代表获取indices
            print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy())
 
 
# 打印前十个测试结果和真实结果进行对比
test_output = cnn(test_x[:10])
pred_y = torch.max(test_output, 1)[1].numpy()
print(pred_y, 'prediction number')
print(test_y[:10].numpy(), 'real number')

AttributeError: module 'torch.optim' has no attribute 'RMSProp'

## Adadelta

In [12]:
optimizer = torch.optim.Adadelta(cnn.parameters(),lr=LR) # 定义优化器
loss_func = nn.CrossEntropyLoss() # 定义损失函数
 
for epoch in range(EPOCH):
 
    for step,(batch_x,batch_y) in enumerate(train_loader):
        pred_y = cnn(batch_x)
        loss = loss_func(pred_y,batch_y)
        optimizer.zero_grad() # 清空上一层梯度
        loss.backward() # 反向传播
        optimizer.step() # 更新优化器的学习率，一般按照epoch为单位进行更新
 
        if step % 50 == 0:
            test_output = cnn(test_x)
            pred_y = torch.max(test_output, 1)[1].numpy()  # torch.max(test_out,1)返回的是test_out中每一行最大的数)
                                                                # 返回的形式为torch.return_types.max(
                                                                #           values=tensor([0.7000, 0.9000]),
                                                                #           indices=tensor([2, 2]))
                                                                # 后面的[1]代表获取indices
            print('Epoch: ', epoch, '| train loss: %.4f' % loss.data.numpy())
 
 
# 打印前十个测试结果和真实结果进行对比
test_output = cnn(test_x[:10])
pred_y = torch.max(test_output, 1)[1].numpy()
print(pred_y, 'prediction number')
print(test_y[:10].numpy(), 'real number')

Epoch:  0 | train loss: 0.0145
Epoch:  0 | train loss: 0.0014
Epoch:  0 | train loss: 0.0084
Epoch:  0 | train loss: 0.0458
Epoch:  0 | train loss: 0.0735
Epoch:  0 | train loss: 0.0078
Epoch:  0 | train loss: 0.0141
Epoch:  0 | train loss: 0.0337
Epoch:  0 | train loss: 0.0274
Epoch:  0 | train loss: 0.0098
Epoch:  0 | train loss: 0.0009
Epoch:  0 | train loss: 0.0048
Epoch:  0 | train loss: 0.0045
Epoch:  0 | train loss: 0.0023
Epoch:  0 | train loss: 0.0598
Epoch:  0 | train loss: 0.0082
Epoch:  0 | train loss: 0.0009
Epoch:  0 | train loss: 0.0577
Epoch:  0 | train loss: 0.0039
Epoch:  0 | train loss: 0.0447
Epoch:  0 | train loss: 0.0019
Epoch:  0 | train loss: 0.0038
Epoch:  0 | train loss: 0.0063
Epoch:  0 | train loss: 0.0139
[7 2 1 0 4 1 4 9 5 9] prediction number
[7 2 1 0 4 1 4 9 5 9] real number
