https://github.com/tensorflow/models/blob/master/official/resnet/cifar10_main.py

In [1]:
"""Runs a ResNet model on the CIFAR-10 dataset."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import argparse
import os
import sys

import tensorflow as tf

_BATCH_SIZE = 128
_HEIGHT = 32
_WIDTH = 32
_DEPTH = 3
_IMAGE_SIZE_CROPPED = 24
_NUM_CLASSES = 10
_NUM_DATA_FILES = 5
_NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
_NUM_EXAMPLES_PER_EPOCH_FOR_EVAL = 10000
_NUM_IMAGES = {'train'     : _NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN,
               'validation': _NUM_EXAMPLES_PER_EPOCH_FOR_EVAL}

  return f(*args, **kwds)


In [2]:
def record_dataset(filenames):
    """Returns an input pipeline Dataset from `filenames`."""
    record_bytes = _HEIGHT * _WIDTH * _DEPTH + 1
    return tf.data.FixedLengthRecordDataset(filenames, record_bytes)


def get_filenames(is_training, data_dir):
    """Returns a list of filenames."""
    data_dir = os.path.join(data_dir, 'cifar-10-batches-bin')

    assert os.path.exists(data_dir), (
        'Run cifar10_download_and_extract.py first to download and extract the CIFAR-10 data.')

    if is_training:
        return [
            os.path.join(data_dir, 'data_batch_%d.bin' % i)
            for i in range(1, _NUM_DATA_FILES + 1)
        ]
    else:
        return [os.path.join(data_dir, 'test_batch.bin')]


def parse_record(raw_record):
    """Parse CIFAR-10 image and label from a raw record."""
    # Every record consists of a label followed by the image, with a fixed number of bytes for each.
    label_bytes = 1
    image_bytes = _HEIGHT * _WIDTH * _DEPTH
    record_bytes = label_bytes + image_bytes

    # Convert bytes to a vector of uint8 that is record_bytes long.
    record_vector = tf.decode_raw(raw_record, tf.uint8)

    # The first byte represents the label, which we convert from uint8 to int32 and then to one-hot.
    label = tf.cast(record_vector[0], tf.int32)
#    label = tf.one_hot(label, _NUM_CLASSES)

    # The remaining bytes after the label represent the image, which we reshape
    # from [depth * height * width] to [depth, height, width].
    depth_major = tf.reshape(
        record_vector[label_bytes:record_bytes], [_DEPTH, _HEIGHT, _WIDTH])

    # Convert from [depth, height, width] to [height, width, depth], and cast as float32.
    image = tf.cast(tf.transpose(depth_major, [1, 2, 0]), tf.float32)
    
    return image, label

def preprocess_image(image, is_training):
    """Preprocess a single image of layout [height, width, depth]."""
    if is_training:
        # Resize the image to add four extra pixels on each side.
        image = tf.image.resize_image_with_crop_or_pad(
            image, _HEIGHT + 8, _WIDTH + 8)

        # Randomly crop a [_HEIGHT, _WIDTH] section of the image.
        height = _IMAGE_SIZE_CROPPED
        width = _IMAGE_SIZE_CROPPED
        image = tf.random_crop(image, [height, width, _DEPTH])

        # Randomly flip the image horizontally.
        image = tf.image.random_flip_left_right(image)
    else:
        # image preprocessing like training
        height = _IMAGE_SIZE_CROPPED
        width = _IMAGE_SIZE_CROPPED
        image = tf.image.resize_image_with_crop_or_pad(image, height, width)

    # Subtract off the mean and divide by the variance of the pixels.
    image = tf.image.per_image_standardization(image)
    return image


def input_fn(is_training, data_dir, batch_size):
    """Input_fn using the tf.data input pipeline for CIFAR-10 dataset.
        Args:
            is_training: A boolean denoting whether the input is for training.
            data_dir: The directory containing the input data.
            batch_size: The number of samples per batch.
            num_epochs: The number of epochs to repeat the dataset.
        Returns:
            A tuple of images and labels.
    """
    dataset = record_dataset(get_filenames(is_training, data_dir))

    if is_training:
        # When choosing shuffle buffer sizes, larger sizes result in better randomness,
        # while smaller sizes have better performance. Because CIFAR-10 is a relatively small dataset,
        # we choose to shuffle the full epoch.
        dataset = dataset.shuffle(buffer_size=_NUM_IMAGES['train'])

    dataset = dataset.map(parse_record)
    dataset = dataset.map(lambda image, label: (preprocess_image(image, is_training), label))

    dataset = dataset.prefetch(2 * batch_size)

    # We call repeat after shuffling, rather than before, to prevent separate epochs from blending together.
    dataset = dataset.repeat()

    # Batch results by up to batch_size, and then fetch the tuple from the iterator.
    dataset = dataset.batch(batch_size)
    iterator = dataset.make_one_shot_iterator()

    return iterator

In [3]:
iterator = input_fn(is_training = True,
                    batch_size = _BATCH_SIZE,
                    data_dir = 'dataset/cifar10')

In [4]:
with tf.Session() as sess:
    next_batch = iterator.get_next()
    data, label = sess.run(next_batch)
    print(data.shape)
    print(label.shape)

(128, 24, 24, 3)
(128,)


In [5]:
from lab12_util import *
from tensorflow.contrib.data import FixedLengthRecordDataset, Iterator

tf.reset_default_graph()

# Generate labels_images_pairs
iterator_train = input_fn(
    is_training=True, batch_size=_BATCH_SIZE, data_dir='dataset/cifar10')
labels_images_pairs = iterator_train.get_next()

# CNN model
model = CNN_Model(
    batch_size = _BATCH_SIZE,
    num_classes = _NUM_CLASSES,
    num_training_example = _NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN,
    num_epoch_per_decay = 350.0,
    init_lr = 0.1,
    moving_average_decay = 0.9999)

with tf.device('/cpu:0'):
    images, labels = labels_images_pairs
    images = tf.reshape(images, [_BATCH_SIZE, 24, 24, _DEPTH])
    labels = tf.reshape(labels, [_BATCH_SIZE])

with tf.variable_scope('model'):
    logits = model.inference(images)

# train
global_step = tf.train.get_or_create_global_step()
total_loss = model.loss(logits, labels)
train_op = model.train(total_loss, global_step)

In [6]:
import time

_NUM_EPOCH = 10
_NUM_BATCH_PER_EPOCH = _NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN // _BATCH_SIZE
ckpt_dir = './model1/'

# train
saver = tf.train.Saver()
with tf.Session() as sess:
    ckpt = tf.train.get_checkpoint_state(ckpt_dir)
    if (ckpt and ckpt.model_checkpoint_path):
        saver.restore(sess, ckpt.model_checkpoint_path)
        # assume the name of checkpoint is like '.../model.ckpt-1000'
        gs = int(ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1])
        sess.run(tf.assign(global_step, gs))
    else:
        # no checkpoint found
        sess.run(tf.global_variables_initializer())
    loss = []
    for i in range(_NUM_EPOCH):
        start = time.time()
        _loss = []
        for _ in range(_NUM_BATCH_PER_EPOCH):
            l, _ = sess.run([total_loss, train_op])
            _loss.append(l)

        end = time.time()
        loss_this_epoch = np.sum(_loss)
        gs = global_step.eval()
        print ("Epoch {}/{}".format(i, _NUM_EPOCH),
               ", Avg. Training loss: {:.4f}".format(loss_this_epoch),
               ", Spend {:.4f} sec".format(end-start))

        loss.append(loss_this_epoch)
        saver.save(sess, ckpt_dir + 'model.ckpt', global_step=gs)

print('Done')

Epoch 0/10 , Avg. Training loss: 1561.5322 , Spend 258.4264 sec
Epoch 1/10 , Avg. Training loss: 1255.9395 , Spend 261.4649 sec
Epoch 2/10 , Avg. Training loss: 1053.4154 , Spend 241.4004 sec
Epoch 3/10 , Avg. Training loss: 901.7992 , Spend 240.0456 sec
Epoch 4/10 , Avg. Training loss: 788.4717 , Spend 236.6721 sec
Epoch 5/10 , Avg. Training loss: 706.6742 , Spend 236.3431 sec
Epoch 6/10 , Avg. Training loss: 643.5648 , Spend 236.3495 sec
Epoch 7/10 , Avg. Training loss: 597.1235 , Spend 237.3216 sec
Epoch 8/10 , Avg. Training loss: 560.1920 , Spend 236.5734 sec
Epoch 9/10 , Avg. Training loss: 531.8333 , Spend 236.5790 sec
Done


In [7]:
iterator = input_fn(is_training = False,
                    batch_size = _BATCH_SIZE,
                    data_dir = 'dataset/cifar10')

with tf.Session() as sess:
    next_batch = iterator.get_next()
    data, label = sess.run(next_batch)
    print(data.shape)
    print(label.shape)

(128, 24, 24, 3)
(128,)


In [8]:
iterator_test = input_fn(
    is_training=False, batch_size=_BATCH_SIZE, data_dir='dataset/cifar10')

with tf.device('/cpu:0'):
    # build testing example queue
    images, labels = iterator_test.get_next()
    images = tf.reshape(images, [_BATCH_SIZE, 24, 24, _DEPTH])
    labels = tf.reshape(labels, [_BATCH_SIZE])

with tf.variable_scope('model', reuse=True):
    logits = model.inference(images)

# use to calculate top-1 error
top_k_op = tf.nn.in_top_k(logits, labels, 1)

In [9]:
num_iter = _NUM_EXAMPLES_PER_EPOCH_FOR_EVAL // _BATCH_SIZE
total_sample_count = num_iter * _BATCH_SIZE

variables_to_restore = model.ema.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)

with tf.Session() as sess:
    # Restore variables from disk.
    ckpt = tf.train.get_checkpoint_state(ckpt_dir)
    if ckpt and ckpt.model_checkpoint_path:
        saver.restore(sess, ckpt.model_checkpoint_path)

        start = time.time()
        true_count = 0
        for _ in range(num_iter):
            predictions = sess.run(top_k_op)
            true_count += np.sum(predictions)
            if _ % 10 == 0:
                print ('Iteration: %d/%d ' % (_, num_iter))
        end = time.time()
        print('Accurarcy: %d/%d = %f' % (true_count, total_sample_count,
                                         true_count / total_sample_count),
              ", Spend {:.4f} sec".format(end-start))
    else:
        print('train first')

INFO:tensorflow:Restoring parameters from ./model1/model.ckpt-3900
Iteration: 0/78 
Iteration: 10/78 
Iteration: 20/78 
Iteration: 30/78 
Iteration: 40/78 
Iteration: 50/78 
Iteration: 60/78 
Iteration: 70/78 
Accurarcy: 6892/9984 = 0.690304 , Spend 13.9689 sec
