In [1]:
import torch
import matplotlib.pyplot as plt
import torch.nn.functional as F
from torch import nn, optim

from torchvision import datasets, transforms

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])
trainset = datasets.MNIST('data', train=True, download=True, transform=transform)
testset = datasets.MNIST('data', train=False, download=True, transform=transform)

class LeNet(nn.Module):
    def __init__(self):
        super(LeNet, self).__init__()
        self.c1 = nn.Conv2d(1, 6, (5,5))
        self.c3 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16*4*4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        x = F.max_pool2d(F.relu(self.c1(x)), 2)
        x = F.max_pool2d(F.relu(self.c3(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]
        num_feature = 1
        for s in size:
            num_feature = num_feature * s
        return num_feature

CUDA = torch.cuda.is_available()
if CUDA:
    lenet = LeNet().cuda()
else:
    lenet = LeNet()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(lenet.parameters(), lr=0.001, momentum=0)

trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True)

def train(model, criterion, optimizer, epochs=1):
    for epoch in range(epochs):
        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            inputs, labels = data
            if CUDA:
                inputs, labels = inputs.cuda(), labels.cuda()
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            if i % 1000 == 999:
                print('[Epoch:%d, Batch:%5d] Loss: %.3f' % (epoch+1, i+1, running_loss / 1000))
                running_loss = 0.0
    print('Finish training')

train(lenet, criterion, optimizer, epochs=2)

[Epoch:1, Batch: 1000] Loss: 2.296
[Epoch:1, Batch: 2000] Loss: 2.268
[Epoch:1, Batch: 3000] Loss: 2.176
[Epoch:1, Batch: 4000] Loss: 1.565
[Epoch:1, Batch: 5000] Loss: 0.696
[Epoch:1, Batch: 6000] Loss: 0.518
[Epoch:1, Batch: 7000] Loss: 0.455
[Epoch:1, Batch: 8000] Loss: 0.398
[Epoch:1, Batch: 9000] Loss: 0.366
[Epoch:1, Batch:10000] Loss: 0.338
[Epoch:1, Batch:11000] Loss: 0.303
[Epoch:1, Batch:12000] Loss: 0.270
[Epoch:1, Batch:13000] Loss: 0.249
[Epoch:1, Batch:14000] Loss: 0.232
[Epoch:1, Batch:15000] Loss: 0.223
[Epoch:2, Batch: 1000] Loss: 0.222
[Epoch:2, Batch: 2000] Loss: 0.190
[Epoch:2, Batch: 3000] Loss: 0.192
[Epoch:2, Batch: 4000] Loss: 0.175
[Epoch:2, Batch: 5000] Loss: 0.153
[Epoch:2, Batch: 6000] Loss: 0.153
[Epoch:2, Batch: 7000] Loss: 0.146
[Epoch:2, Batch: 8000] Loss: 0.156
[Epoch:2, Batch: 9000] Loss: 0.138
[Epoch:2, Batch:10000] Loss: 0.139
[Epoch:2, Batch:11000] Loss: 0.144
[Epoch:2, Batch:12000] Loss: 0.134
[Epoch:2, Batch:13000] Loss: 0.124
[Epoch:2, Batch:1400