In [1]:
import numpy as np
from torch import nn,optim
from torch.autograd import Variable
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch

In [2]:
# 训练集
train_dataset = datasets.MNIST(root='./',
                               train=True,
                               transform=transforms.ToTensor(),
                               download=True)
# 测试集
test_dataset = datasets.MNIST(root='./',
                               train=False,
                               transform=transforms.ToTensor(),
                               download=True)

In [3]:
# 批次大小  想想数据为为什么要分批
batch_size =64
# 装载训练集
train_loader = DataLoader(dataset=train_dataset,
                         batch_size=batch_size,
                         shuffle=True)
# 装载测试集
test_loader = DataLoader(dataset=test_dataset,
                         batch_size=batch_size,
                         shuffle=True)

In [4]:
len(train_loader)
len(test_dataset)

10000

In [5]:
for i,data in enumerate(train_loader):
    inputs,lables = data
    print(inputs.shape)
    print(lables.shape)
    break

torch.Size([64, 1, 28, 28])
torch.Size([64])


In [6]:
len(train_loader)

938

In [7]:
#定义网络结构
class Net(nn.Module):
    def __init__(self):
        super(Net,self).__init__()
        self.layer1 = nn.Sequential(nn.Linear(784,500), nn.Dropout(p=0), nn.Tanh())#p=0表示使用全部的神经元（不使用Dropout），Tanh是激活函数
        self.layer2 = nn.Sequential(nn.Linear(500,300), nn.Dropout(p=0), nn.Tanh())
        self.layer3 = nn.Sequential(nn.Linear(300,10), nn.Softmax(dim=1))
    def forward(self,x):
        #[64,1,28,28]->[64,784]
        x=x.view(x.size()[0],-1)
        x=self.layer1(x)
        x=self.layer2(x)
        x=self.layer3(x)
        return x

In [8]:
LR = 0.5
#定义模型
model = Net()
#定义代价函数 交叉熵
mes_loss = nn.CrossEntropyLoss()
#定义优化器 使用L2正则化
optimizer = optim.SGD(model.parameters(),LR,weight_decay=0.0001)#weight_decay表示正则化系数

In [9]:
#训练模型
def train():
    model.train()#模型的训练状态 Dropout起作用
    for i ,data in enumerate(train_loader):
        #获得一个批次的数据和标签
        inputs,lables = data
        #获得模型预测结果
        out = model(inputs) #[64,10]
        #计算loss 使用交叉熵不需要进行独热编码
        loss = mes_loss(out,lables)
        #梯度清零
        optimizer.zero_grad()
        #计算梯度
        loss.backward()
        #修改权值
        optimizer.step()

In [10]:
def test():
    model.eval()#模型的测试状态 Dropout不起作用
    #获取测试集的准确率
    correct=0
    for i,data in enumerate(test_loader):
        #获得一个批次的数据和标签
        inputs,lables = data
        #获得模型的预测结果 [64,10]
        out = model(inputs)
        #获得最大值以及最大值所在的位置
        _, predicted = torch.max(out,1)
        correct += (predicted==lables).sum()
        
    print("Test acc:{0}".format(correct.item()/len(test_dataset)))
    print(correct)
    
    #获取训练集的准确率
    correct=0
    for i,data in enumerate(train_loader):
        #获得一个批次的数据和标签
        inputs,lables = data
        #获得模型的预测结果 [64,10]
        out = model(inputs)
        #获得最大值以及最大值所在的位置
        _, predicted = torch.max(out,1)
        correct += (predicted==lables).sum()
        
    print("Train acc:{0}".format(correct.item()/len(train_dataset)))
    print(correct)
        

In [11]:
for epoch in range(10):
    print("epoch:",epoch)
    train()
    test()

epoch: 0
Test acc:0.9238
tensor(9238)
Train acc:0.9177833333333333
tensor(55067)
epoch: 1
Test acc:0.9308
tensor(9308)
Train acc:0.9311333333333334
tensor(55868)
epoch: 2
Test acc:0.9374
tensor(9374)
Train acc:0.9421833333333334
tensor(56531)
epoch: 3
Test acc:0.9503
tensor(9503)
Train acc:0.9535833333333333
tensor(57215)
epoch: 4
Test acc:0.9527
tensor(9527)
Train acc:0.9577833333333333
tensor(57467)
epoch: 5
Test acc:0.952
tensor(9520)
Train acc:0.9581833333333334
tensor(57491)
epoch: 6
Test acc:0.948
tensor(9480)
Train acc:0.9520166666666666
tensor(57121)
epoch: 7
Test acc:0.9599
tensor(9599)
Train acc:0.9684333333333334
tensor(58106)
epoch: 8
Test acc:0.9627
tensor(9627)
Train acc:0.9709666666666666
tensor(58258)
epoch: 9
Test acc:0.9645
tensor(9645)
Train acc:0.97195
tensor(58317)
