In [4]:
import numpy as np

import torch
from torch.autograd import Variable
from torch import optim

from data_util import load_mnist

In [8]:
def build_model(input_dim, output_dim):
    # We don't need the softmax layer here since CrossEntropyLoss already uses it internally.
    model = torch.nn.Sequential()
    model.add_module("linear", torch.nn.Linear(input_dim, output_dim, bias=False))
    return model


def train(model, loss, optimizer, x_val, y_val):
    x = Variable(x_val, requires_grad=False)
    y = Variable(y_val, requires_grad=False)

    # Reset gradient
    optimizer.zero_grad()

    # Forward
    fx = model.forward(x)
    output = loss.forward(fx, y)

    # Backward
    output.backward()

    # Update parameters
    optimizer.step()

    return output.data[0]


def predict(model, x_val):
    x = Variable(x_val, requires_grad=False)
    output = model.forward(x)
    return output.data.numpy().argmax(axis=1)


def main():
    torch.manual_seed(42)
    trX, teX, trY, teY = load_mnist(onehot=False)
    trX = torch.from_numpy(trX).float()
    teX = torch.from_numpy(teX).float()
    trY = torch.from_numpy(trY).long()

    n_examples, n_features = trX.size()
    n_classes = 10
    model = build_model(n_features, n_classes)
    loss = torch.nn.CrossEntropyLoss(size_average=True)
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    batch_size = 100

    for i in range(100):
        cost = 0.
        num_batches = int(n_examples / batch_size)
        for k in range(num_batches):
            start, end = k * batch_size, (k + 1) * batch_size
            cost += train(model, loss, optimizer, trX[start:end], trY[start:end])
        predY = predict(model, teX)
        print("Epoch %d, cost = %f, acc = %.2f%%"
              % (i + 1, cost / num_batches, 100. * np.mean(predY == teY)))


In [9]:
main()

Epoch 1, cost = 0.548300, acc = 90.01%
Epoch 2, cost = 0.364975, acc = 90.91%
Epoch 3, cost = 0.338107, acc = 91.34%
Epoch 4, cost = 0.324099, acc = 91.48%
Epoch 5, cost = 0.315061, acc = 91.71%
Epoch 6, cost = 0.308569, acc = 91.80%
Epoch 7, cost = 0.303588, acc = 91.86%
Epoch 8, cost = 0.299590, acc = 91.92%
Epoch 9, cost = 0.296276, acc = 91.95%
Epoch 10, cost = 0.293460, acc = 92.01%
Epoch 11, cost = 0.291021, acc = 92.03%
Epoch 12, cost = 0.288878, acc = 92.01%
Epoch 13, cost = 0.286971, acc = 92.05%
Epoch 14, cost = 0.285256, acc = 92.09%
Epoch 15, cost = 0.283702, acc = 92.14%
Epoch 16, cost = 0.282282, acc = 92.18%
Epoch 17, cost = 0.280978, acc = 92.19%
Epoch 18, cost = 0.279773, acc = 92.20%
Epoch 19, cost = 0.278654, acc = 92.22%
Epoch 20, cost = 0.277611, acc = 92.23%
Epoch 21, cost = 0.276635, acc = 92.24%
Epoch 22, cost = 0.275718, acc = 92.26%
Epoch 23, cost = 0.274855, acc = 92.25%
Epoch 24, cost = 0.274040, acc = 92.29%
Epoch 25, cost = 0.273268, acc = 92.30%
Epoch 26,