In [1]:
import numpy as np
from torch import nn,optim
from torch.autograd import Variable
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch

In [2]:
# 训练集
train_dataset = datasets.MNIST(root='./',
                               train=True,
                               transform=transforms.ToTensor(),
                               download=True)
# 测试集
test_dataset = datasets.MNIST(root='./',
                               train=False,
                               transform=transforms.ToTensor(),
                               download=True)

In [3]:
# 批次大小  想想数据为为什么要分批
batch_size =64
# 装载训练集
train_loader = DataLoader(dataset=train_dataset,
                         batch_size=batch_size,
                         shuffle=True)
# 装载测试集
test_loader = DataLoader(dataset=test_dataset,
                         batch_size=batch_size,
                         shuffle=True)

In [4]:
print(len(train_dataset))
print(len(test_dataset))
print(len(train_loader))
print(len(test_loader))

60000
10000
938
157


In [5]:
for i,data in enumerate(train_loader):
    inputs,lables = data
    print(inputs.shape)
    print(lables.shape)
    break

torch.Size([64, 1, 28, 28])
torch.Size([64])


In [6]:
len(train_loader)

938

In [7]:
#定义网络结构
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        #卷积层和池化层
        #[64,1,28,28]->[64,32,28,28]->[64,32,14,14]
        self.conv1 = nn.Sequential(nn.Conv2d(1,32,5,1,2), nn.ReLU(), nn.MaxPool2d(2,2))
        #[64,32,14,14]->[64,64,14,14]->[64,64,7,7]
        self.conv2 = nn.Sequential(nn.Conv2d(32,64,5,1,2), nn.ReLU(), nn.MaxPool2d(2,2))
        #全连接层
        #[64,64*7*7]->[64,1000]
        self.fc1 = nn.Sequential(nn.Linear(64*7*7,1000), nn.Dropout(p=0.5), nn.ReLU())
        #[64,1000]->[64,10]
        self.fc2 = nn.Sequential(nn.Linear(1000,10), nn.Softmax(dim=1))
    def forward(self,x):
        #[64,1,28,28] 64是批次 1是通道（黑白是1，彩色是3） 28 28是图片分辨率
        #卷积神经网络需要的数据是4维
        x=self.conv1(x)
        x=self.conv2(x)
        #全连接层需要进行数据格式的转换 [64,64,7,7]->[64,64*7*7]
        x=x.view(x.size()[0],-1)
        x=self.fc1(x)
        x=self.fc2(x)
        return x

In [8]:
LR = 0.0003 #Adam优化器的学习率一般设置的较低
#定义模型
model = Net()
#定义代价函数
mes_loss = nn.MSELoss()
#定义优化器
optimizer = optim.Adam(model.parameters(),LR)

In [9]:
#训练模型
def train():
    for i ,data in enumerate(train_loader):
        #获得一个批次的数据和标签
        inputs,lables = data
        #获得模型预测结果
        out = model(inputs) #[64,10]
        #把数据标签变成独热编码
        #[64]->[64,1]
        lables = lables.reshape(-1,1)
        #tensor.scatter(dim,index,scr)
        
        #dim:对那个维度进行独热编码
        #index:要将scr中的值放在tensor中的哪个位置
        #scr:插入index中的值
        one_hot = torch.zeros(inputs.shape[0],10).scatter(1,lables,1)
        #计算loss
        loss = mes_loss(out,one_hot)
        #梯度清零
        optimizer.zero_grad()
        #计算梯度
        loss.backward()
        #修改权值
        optimizer.step()

In [10]:
def test():
    correct=0
    for i,data in enumerate(test_loader):
        #获得一个批次的数据和标签
        inputs,lables = data
        #获得模型的预测结果 [64,10]
        out = model(inputs)
        #获得最大值以及最大值所在的位置
        _, predicted = torch.max(out,1)
        correct += (predicted==lables).sum()
        
    print("Test acc:{0}".format(correct.item()/len(test_dataset)))
    print(correct)
        

In [11]:
for epoch in range(2):
    print("epoch:",epoch)
    train()
    test()

epoch: 0
Test acc:0.9783
tensor(9783)
epoch: 1
Test acc:0.9852
tensor(9852)


In [13]:
#模型保存 保存训练后的权值
torch.save(model.state_dict(),'model/minst_cnn_model.pth')