In [1]:
import matplotlib.pyplot as plt
import matplotlib.cm as cm
%matplotlib inline

import time

import pandas as pd
import seaborn as sns
import numpy as np
from tqdm import tqdm

import tensorflow as tf
import tensorflow.contrib.slim as slim

from tensorflow.examples.tutorials.mnist import input_data
mnist = input_data.read_data_sets("MNIST_data/", reshape=False)

Extracting MNIST_data/train-images-idx3-ubyte.gz
Extracting MNIST_data/train-labels-idx1-ubyte.gz
Extracting MNIST_data/t10k-images-idx3-ubyte.gz
Extracting MNIST_data/t10k-labels-idx1-ubyte.gz


In [2]:
img_size = (28, 28)
n_classes = 10

imgs_train = mnist.train.images.reshape((-1, 1, *img_size)) / 255
imgs_val = mnist.validation.images.reshape((-1, 1, *img_size)) / 255
imgs_test = mnist.test.images.reshape((-1, 1, *img_size)) / 255

y_train = mnist.train.labels.astype(np.int32)
y_val = mnist.validation.labels.astype(np.int32)
y_test = mnist.test.labels.astype(np.int32)

def make_batch_iter(x, y, batch_size, shuffle=False):
    n = len(x)
    idx = np.arange(n)
    if shuffle:
        np.random.shuffle(idx)
    
    for i in range(0, n, batch_size):
        x_batch = x[i:i+batch_size]
        y_batch = y[i:i+batch_size]
        yield np.array(x_batch, np.float32), np.array(y_batch)

In [3]:
def build_fe(l, n_layers, kernel_size, n_chans_base, n_chans_mul):
    for i in range(n_layers):
        n_chans = n_chans_base * n_chans_mul ** i
        l = slim.conv2d(l, n_chans, kernel_size, padding='valid',
                        data_format='NCHW', activation_fn=tf.nn.relu,
                        normalizer_fn=slim.batch_norm)
        l = slim.max_pool2d(l, 2, 2, data_format='NCHW')
    l = tf.reduce_mean(l, axis=(2, 3), name='global_average_pooling')
    return l

def build_clf(l):
    n_layers = 2
    units = 1024
    for i in range(n_layers):
        l = slim.relu(l, units, normalizer_fn=slim.batch_norm)
    l = slim.fully_connected(l, n_classes, normalizer_fn=slim.batch_norm)
    return l

class Model:
    def __init__(self, n_layers, n_chans_base, n_chans_mul, kernel_size):
        self.x_ph = tf.placeholder(tf.float32, (None, 1, None, None))
        self.training = tf.placeholder(tf.bool)
        self.y_ph = tf.placeholder(tf.int64, (None,))
        
        with slim.arg_scope([slim.batch_norm], is_training=self.training,
                            decay=0.9, data_format='NCHW', fused=True):
            self.fe = build_fe(self.x_ph, n_layers, kernel_size,
                               n_chans_base, n_chans_mul)
            self.logits = build_clf(self.fe)
        
        self.loss = tf.reduce_mean(tf.losses.sparse_softmax_cross_entropy(self.y_ph, self.logits))
        self.acc = tf.contrib.metrics.accuracy(tf.argmax(self.logits, 1), self.y_ph)
        
        self.train_op = slim.learning.create_train_op(self.loss, tf.train.AdamOptimizer())

n_layers = 3
kernel_size = 3
n_chans_base = 16
n_chans_mul = 2

tf.reset_default_graph()
graph = tf.get_default_graph()
with graph.as_default():
    model = Model(n_layers, n_chans_base, n_chans_mul, kernel_size)

In [4]:
n_epoch = 100
batch_size = 1024

with tf.Session(graph=graph) as session:
    session.run(tf.global_variables_initializer())
    for epoch in range(n_epoch):
        train_iter = make_batch_iter(imgs_train, y_train, batch_size=batch_size)
        val_iter = make_batch_iter(imgs_val, y_val, batch_size=batch_size)

        start = time.time()

        losses = []
        weights = []
        accs = []
        for x_batch, y_batch in train_iter:
            feed_dict = {model.x_ph: x_batch, model.y_ph: y_batch, model.training: True}
            _, loss, acc = session.run([model.train_op, model.loss, model.acc], feed_dict)

            accs.append(acc)
            losses.append(loss)
            weights.append(len(x_batch))

        train_loss = np.average(np.array(losses).flatten(), weights=weights)
        train_acc = np.average(np.array(accs).flatten(), weights=weights)

        losses = []
        weights = []
        accs = []
        for x_batch, y_batch in val_iter:
            feed_dict = {model.x_ph: x_batch, model.y_ph: y_batch, model.training: False}
            loss, acc = session.run([model.loss, model.acc], feed_dict)

            accs.append(acc)
            losses.append(loss)
            weights.append(len(x_batch))

        end = time.time()

        val_loss = np.average(np.array(losses).flatten(), weights=weights)
        val_acc = np.average(np.array(accs).flatten(), weights=weights)

        print('Epoch {}'.format(epoch))
        print('Train:', train_loss, train_acc)
        print('Val  :', val_loss, val_acc)
        print('Time :', end - start)
        print('\n')

Epoch 0
Train: 0.689129796488 0.909418182442
Val  : 2.6344859848 0.112600002193
Time : 3.0514960289001465


Epoch 1
Train: 0.472594303083 0.971309091819
Val  : 0.484376084185 0.973799996567
Time : 1.4473044872283936


Epoch 2
Train: 0.421500842411 0.980036364044
Val  : 0.439248408604 0.980199998665
Time : 1.450057029724121


Epoch 3
Train: 0.387938246935 0.98489090851
Val  : 0.392955954552 0.98499999218
Time : 1.4433908462524414


Epoch 4
Train: 0.362638219842 0.988727272736
Val  : 0.356715663624 0.986199982834
Time : 1.4462671279907227


Epoch 5
Train: 0.343041778538 0.991000000598
Val  : 0.332703485203 0.987599977779
Time : 1.4351048469543457


Epoch 6
Train: 0.328456886565 0.992563636962
Val  : 0.323737135696 0.987999977779
Time : 1.4410545825958252


Epoch 7
Train: 0.317705158958 0.993945455438
Val  : 0.320323725128 0.987399977779
Time : 1.4476897716522217


Epoch 8
Train: 0.310533735709 0.995127274409
Val  : 0.319051664591 0.98819998064
Time : 1.4474480152130127


Epoch 9
Train: 0

KeyboardInterrupt: 