In [1]:
import gzip
import os
import urllib.request as request
from os import path

import numpy as np

DATASET_DIR = 'datasets/'

MNIST_FILES = ["train-images-idx3-ubyte.gz", "train-labels-idx1-ubyte.gz",
               "t10k-images-idx3-ubyte.gz", "t10k-labels-idx1-ubyte.gz"]

In [2]:
def download_file(url, local_path):
    dir_path = path.dirname(local_path)
    if not path.exists(dir_path):
        print("Creating the directory '%s' ..." % dir_path)
        os.makedirs(dir_path)

    print("Downloading from '%s' ..." % url)
    request.urlretrieve(url, local_path)

In [3]:
def download_mnist(local_path):
    url_root = "http://yann.lecun.com/exdb/mnist/"
    for f_name in MNIST_FILES:
        f_path = os.path.join(local_path, f_name)
        if not path.exists(f_path):
            download_file(url_root + f_name, f_path)

In [4]:
def one_hot(x, n):
    if type(x) == list:
        x = np.array(x)
    x = x.flatten()
    o_h = np.zeros((len(x), n))
    o_h[np.arange(len(x)), x] = 1
    return o_h

In [5]:
def load_mnist(ntrain=60000, ntest=10000, onehot=True):
    data_dir = os.path.join(DATASET_DIR, 'mnist/')
    if not path.exists(data_dir):
        download_mnist(data_dir)
    else:
        # check all files
        checks = [path.exists(os.path.join(data_dir, f)) for f in MNIST_FILES]
        if not np.all(checks):
            download_mnist(data_dir)

    with gzip.open(os.path.join(data_dir, 'train-images-idx3-ubyte.gz')) as fd:
        buf = fd.read()
        loaded = np.frombuffer(buf, dtype=np.uint8)
        trX = loaded[16:].reshape((60000, 28 * 28)).astype(float)

    with gzip.open(os.path.join(data_dir, 'train-labels-idx1-ubyte.gz')) as fd:
        buf = fd.read()
        loaded = np.frombuffer(buf, dtype=np.uint8)
        trY = loaded[8:].reshape((60000))

    with gzip.open(os.path.join(data_dir, 't10k-images-idx3-ubyte.gz')) as fd:
        buf = fd.read()
        loaded = np.frombuffer(buf, dtype=np.uint8)
        teX = loaded[16:].reshape((10000, 28 * 28)).astype(float)

    with gzip.open(os.path.join(data_dir, 't10k-labels-idx1-ubyte.gz')) as fd:
        buf = fd.read()
        loaded = np.frombuffer(buf, dtype=np.uint8)
        teY = loaded[8:].reshape((10000))

    trX /= 255.
    teX /= 255.

    trX = trX[:ntrain]
    trY = trY[:ntrain]

    teX = teX[:ntest]
    teY = teY[:ntest]

    if onehot:
        trY = one_hot(trY, 10)
        teY = one_hot(teY, 10)
    else:
        trY = np.asarray(trY)
        teY = np.asarray(teY)

    return trX, teX, trY, teY

In [6]:
def build_model(input_dim,output_dim):
  model = torch.nn.Sequential()
  model.add_module("linear",torch.nn.Linear(input_dim,output_dim,bias=False))
  return model

In [7]:
def build_model(input_dim, output_dim):
    # We don't need the softmax layer here since CrossEntropyLoss already
    # uses it internally.
    model = torch.nn.Sequential()
    model.add_module("linear",
                     torch.nn.Linear(input_dim, output_dim, bias=False))
    return model

In [8]:
def train(model, loss, optimizer, x_val, y_val):
    model.train()
    x = Variable(x_val, requires_grad=False)
    y = Variable(y_val, requires_grad=False)

    # Reset gradient
    optimizer.zero_grad()

    # Forward
    fx = model.forward(x)
    output = loss.forward(fx, y)

    # Backward
    output.backward()

    # Update parameters
    optimizer.step()

    return output.item()

In [9]:

def predict(model, x_val):
    model.eval()
    x = Variable(x_val, requires_grad=False)
    output = model.forward(x)
    return output.data.numpy().argmax(axis=1)

In [10]:
def main():
    torch.manual_seed(42)
    trX, teX, trY, teY = load_mnist(onehot=False)
    trX = torch.from_numpy(trX).float()
    teX = torch.from_numpy(teX).float()
    trY = torch.from_numpy(trY).long()

    n_examples, n_features = trX.size()
    n_classes = 10
    model = build_model(n_features, n_classes)
    loss = torch.nn.CrossEntropyLoss(reduction='elementwise_mean')
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    batch_size = 100

    for i in range(100):
        cost = 0.
        num_batches = n_examples // batch_size
        for k in range(num_batches):
            start, end = k * batch_size, (k + 1) * batch_size
            cost += train(model, loss, optimizer,
                          trX[start:end], trY[start:end])
        predY = predict(model, teX)
        print("Epoch %d, cost = %f, acc = %.2f%%"
              % (i + 1, cost / num_batches, 100. * np.mean(predY == teY)))

In [11]:
import numpy as np

import torch
from torch.autograd import Variable
from torch import optim
if __name__ == "__main__":
    main()

Creating the directory 'datasets/mnist' ...
Downloading from 'http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz' ...
Downloading from 'http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz' ...
Downloading from 'http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz' ...
Downloading from 'http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz' ...


  


Epoch 1, cost = 0.547787, acc = 90.15%
Epoch 2, cost = 0.365290, acc = 90.95%
Epoch 3, cost = 0.338373, acc = 91.26%
Epoch 4, cost = 0.324327, acc = 91.48%
Epoch 5, cost = 0.315265, acc = 91.73%
Epoch 6, cost = 0.308756, acc = 91.78%
Epoch 7, cost = 0.303762, acc = 91.91%
Epoch 8, cost = 0.299754, acc = 91.98%
Epoch 9, cost = 0.296430, acc = 92.00%
Epoch 10, cost = 0.293606, acc = 92.02%
Epoch 11, cost = 0.291160, acc = 91.99%
Epoch 12, cost = 0.289010, acc = 92.05%
Epoch 13, cost = 0.287096, acc = 92.11%
Epoch 14, cost = 0.285375, acc = 92.16%
Epoch 15, cost = 0.283815, acc = 92.16%
Epoch 16, cost = 0.282389, acc = 92.22%
Epoch 17, cost = 0.281080, acc = 92.21%
Epoch 18, cost = 0.279869, acc = 92.24%
Epoch 19, cost = 0.278746, acc = 92.24%
Epoch 20, cost = 0.277698, acc = 92.26%
Epoch 21, cost = 0.276718, acc = 92.23%
Epoch 22, cost = 0.275797, acc = 92.25%
Epoch 23, cost = 0.274930, acc = 92.23%
Epoch 24, cost = 0.274111, acc = 92.25%
Epoch 25, cost = 0.273336, acc = 92.28%
Epoch 26,