In [14]:
import torch
from torch import nn, optim
from torch.autograd import Variable
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision import transforms
from logging import Logger

In [17]:
# 定义超参数
batch_size = 128
learning_rate = 1e-2
num_epochs = 20

In [3]:
def to_np(x):
    return x.cpu().data.numpy()

## Load MNIST Dataset

In [6]:
# 下载训练集 MNIST 手写数字训练集
train_dataset = datasets.MNIST(
    root='./data', train=True, transform=transforms.ToTensor(), download=True)

test_dataset = datasets.MNIST(
    root='./data', train=False, transform=transforms.ToTensor())

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

## CNN model

In [7]:
class CNN(nn.Module):
    def __init__(self, in_dim, n_class):
        super(CNN, self).__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(in_dim, 6, 3, stride=1, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Conv2d(6, 16, 5, stride=1, padding=0),
            nn.ReLU(),
            nn.MaxPool2d(2,2)
        )
        self.fc = nn.Sequential(
            nn.Linear(400,120),
            nn.Linear(120,84),
            nn.Linear(84,n_class)
        )
        
    def forward(self, x):
        out = self.conv(x)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

## Train

In [15]:
model = CNN(1,10)
use_gpu = torch.cuda.is_available()
if use_gpu:
    model = model.cuda()
# 定义loss和optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=learning_rate)
logger = Logger('./logs')

In [19]:
# 开始训练
for epoch in range(num_epochs):
    print('epoch {}'.format(epoch + 1))
    print('*' * 10)
    running_loss = 0.0
    running_acc = 0.0
    for i, data in enumerate(train_loader, 1):
        img, label = data
        if use_gpu:
            img = img.cuda()
            label = label.cuda()
        img = Variable(img)
        label = Variable(label)
        # 向前传播
        out = model(img)
        loss = criterion(out, label)
        running_loss += loss.data[0] * label.size(0)
        _, pred = torch.max(out, 1)
        num_correct = (pred == label).sum()
        accuracy = (pred == label).float().mean()
        running_acc += num_correct.data[0]
        # 向后传播
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        '''
        # ========================= Log ======================
        step = epoch * len(train_loader) + i
        # (1) Log the scalar values
        info = {'loss': loss.data[0], 'accuracy': accuracy.data[0]}

        for tag, value in info.items():
            logger.scalar_summary(tag, value, step)
            
        # (2) Log values and gradients of the parameters (histogram)
        for tag, value in model.named_parameters():
            tag = tag.replace('.', '/')
            logger.histo_summary(tag, to_np(value), step)
            logger.histo_summary(tag + '/grad', to_np(value.grad), step)

        # (3) Log the images
        info = {'images': to_np(img.view(-1, 28, 28)[:10])}

        for tag, images in info.items():
            logger.image_summary(tag, images, step)
        '''
        if i % 300 == 0:
            print('[{}/{}] Loss: {:.6f}, Acc: {:.6f}'.format(
                epoch + 1, num_epochs, running_loss / (batch_size * i),
                running_acc / (batch_size * i)))
    print('Finish {} epoch, Loss: {:.6f}, Acc: {:.6f}'.format(
        epoch + 1, running_loss / (len(train_dataset)), running_acc / (len(
            train_dataset))))
    model.eval()
    eval_loss = 0
    eval_acc = 0
    for data in test_loader:
        img, label = data
        if use_gpu:
            img = Variable(img, volatile=True).cuda()
            label = Variable(label, volatile=True).cuda()
        else:
            img = Variable(img, volatile=True)
            label = Variable(label, volatile=True)
        out = model(img)
        loss = criterion(out, label)
        eval_loss += loss.data[0] * label.size(0)
        _, pred = torch.max(out, 1)
        num_correct = (pred == label).sum()
        eval_acc += num_correct.data[0]
    print('Test Loss: {:.6f}, Acc: {:.6f}'.format(eval_loss / (len(
        test_dataset)), eval_acc / (len(test_dataset))))
    print()

epoch 1
**********
[1/20] Loss: 2.282881, Acc: 0.216667
Finish 1 epoch, Loss: 2.218296, Acc: 0.333467
Test Loss: 1.754201, Acc: 0.640700

epoch 2
**********
[2/20] Loss: 0.849530, Acc: 0.783073
Finish 2 epoch, Loss: 0.704154, Acc: 0.814500
Test Loss: 0.383454, Acc: 0.884900

epoch 3
**********
[3/20] Loss: 0.355583, Acc: 0.894271
Finish 3 epoch, Loss: 0.335307, Acc: 0.899767
Test Loss: 0.267310, Acc: 0.921700

epoch 4
**********
[4/20] Loss: 0.265501, Acc: 0.918906
Finish 4 epoch, Loss: 0.255169, Acc: 0.922150
Test Loss: 0.210379, Acc: 0.934500

epoch 5
**********
[5/20] Loss: 0.207444, Acc: 0.936693
Finish 5 epoch, Loss: 0.202946, Acc: 0.938483
Test Loss: 0.172802, Acc: 0.946800

epoch 6
**********
[6/20] Loss: 0.175132, Acc: 0.947292
Finish 6 epoch, Loss: 0.167988, Acc: 0.949467
Test Loss: 0.142982, Acc: 0.954700

epoch 7
**********
[7/20] Loss: 0.150699, Acc: 0.954740
Finish 7 epoch, Loss: 0.145218, Acc: 0.956633
Test Loss: 0.124926, Acc: 0.962400

epoch 8
**********
[8/20] Loss: 0.

## Save model

In [20]:
# 保存模型
torch.save(model.state_dict(), './cnn.pth')