In [1]:
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F
from torchvision import datasets, transforms

### 导入数据

In [2]:
transform = transforms.Compose([transforms.ToTensor(),
                               transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
train_data = datasets.MNIST('../MNIST_data', download=False, train=True, transform=transform)
train_data_loader = torch.utils.data.DataLoader(train_data, batch_size=64, shuffle=True)

test_data = datasets.MNIST('../MNIST_data', download=False, train=False, transform=transform)
test_data_loader = torch.utils.data.DataLoader(test_data, batch_size=64, shuffle=True)

### 创建类

In [3]:
class Network(nn.Module):
    def __init__(self, input_size, hidden_sizes, output_size, drop_p = 0.5):
        super().__init__()
        
        self.hidden_layers = nn.ModuleList([nn.Linear(input_size, hidden_sizes[0])])
        
        hidden_layers = zip(hidden_sizes[:-1], hidden_sizes[1:])
        self.hidden_layers.extend([nn.Linear(h1, h2) for h1, h2 in hidden_layers])
        
        self.output = nn.Linear(hidden_sizes[-1], output_size)
        self.dropout = nn.Dropout(p=drop_p)
    
    def forward(self, x):
        for linear in self.hidden_layers:
            x = F.relu(linear(x))
            x = self.dropout(x)
        x = self.output(x)
        return F.log_softmax(x, dim=1)

### 定义验证函数

In [4]:
def validation(model, test_data_loader, criterion):
    test_loss = 0
    accuracy = 0
    for images, labels in iter(test_data_loader):
        images.resize_(images.shape[0], 28 * 28)
        with torch.no_grad():
            output = model.forward(images)
            loss = criterion(output, labels)
            test_loss += loss.item()
            
            prediction = torch.exp(output)
            equation = prediction.max(dim = 1)[1] == labels.data
            accuracy += equation.type(torch.FloatTensor).mean()
            
    return loss / len(test_data_loader), accuracy / len(test_data_loader)

### 定义criterion optimizer，创建model

In [5]:
model = Network(28 * 28, [128, 64, 32], 10, drop_p=0.5)
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

### 训练

In [6]:
epochs = 3
curr_loss = 0
step = 0
print_every = 40

model.train()
for epoch in range(epochs):
    for images, labels in iter(train_data_loader):
        step += 1
        images.resize_(images.size()[0], 28 * 28)
        optimizer.zero_grad()
        
        output = model.forward(images)
        loss = criterion(output, labels)
        curr_loss += loss.item()
        loss.backward()
        optimizer.step()
        
        if step % print_every == 0:
            test_loss, test_accuracy = validation(model, test_data_loader, criterion)
            print('{} / {} epoch'.format(epoch + 1, epochs))
            print('current loss is {:.4f}'.format(curr_loss / print_every))
            print('test loss: {:.4f}'.format(test_loss))
            print('test accuracy: {:.4f}'.format(test_accuracy))
            model.train()
            curr_loss = 0

1 / 3 epoch
current loss is 2.2962
test loss: 0.0140
test accuracy: 0.1719
1 / 3 epoch
current loss is 2.1869
test loss: 0.0123
test accuracy: 0.2515
1 / 3 epoch
current loss is 1.9431
test loss: 0.0126
test accuracy: 0.3494
1 / 3 epoch
current loss is 1.6822
test loss: 0.0100
test accuracy: 0.4324
1 / 3 epoch
current loss is 1.5351
test loss: 0.0099
test accuracy: 0.4824
1 / 3 epoch
current loss is 1.3679
test loss: 0.0078
test accuracy: 0.5251
1 / 3 epoch
current loss is 1.3102
test loss: 0.0087
test accuracy: 0.5570
1 / 3 epoch
current loss is 1.2306
test loss: 0.0064
test accuracy: 0.5895
1 / 3 epoch
current loss is 1.1638
test loss: 0.0101
test accuracy: 0.6126
1 / 3 epoch
current loss is 1.1110
test loss: 0.0069
test accuracy: 0.6406


KeyboardInterrupt: 