In [1]:
import os
import time
import pickle

import numpy
import numpy as np
import tensorflow as tf
tf.enable_eager_execution()

In [2]:
os.environ["TF_CPP_MIN_LOG_LEVEL"] = '2'
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ['CUDA_VISIBLE_DEVICES'] = "1"

In [3]:
file_name = "/home/xyang2/project/data/dataset/mnist/seed1/labeled_train.tfrecords"

In [4]:
import argparse
import os
import sys
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data

def _data_path(data_directory:str, name:str) -> str:
    """Construct a full path to a TFRecord file to be stored in the 
    data_directory. Will also ensure the data directory exists
    
    Args:
        data_directory: The directory where the records will be stored
        name:           The name of the TFRecord
    
    Returns:
        The full path to the TFRecord file
    """
    if not os.path.isdir(data_directory):
        os.makedirs(data_directory)

    return os.path.join(data_directory, f'{name}.tfrecords')

def _int64_feature(value:int) -> tf.train.Features.FeatureEntry:
    """Create a Int64List Feature
    
    Args:
        value: The value to store in the feature
    
    Returns:
        The FeatureEntry
    """
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def _bytes_feature(value:str) -> tf.train.Features.FeatureEntry:
    """Create a BytesList Feature
    
    Args:
        value: The value to store in the feature
    
    Returns:
        The FeatureEntry
    """
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def convert_to(data_set, name:str, data_directory:str, num_shards:int=1):
    """Convert the dataset into TFRecords on disk
    
    Args:
        data_set:       The MNIST data set to convert
        name:           The name of the data set
        data_directory: The directory where records will be stored
        num_shards:     The number of files on disk to separate records into
    """
    print(f'Processing {name} data')

    images = data_set.images
    labels = data_set.labels
    
    num_examples, rows, cols, depth = data_set.images.shape

    def _process_examples(start_idx:int, end_index:int, filename:str):
        with tf.python_io.TFRecordWriter(filename) as writer:
            for index in range(start_idx, end_index):
                sys.stdout.write(f"\rProcessing sample {index+1} of {num_examples}")
                sys.stdout.flush()

                image_raw = images[index].tostring()
                example = tf.train.Example(features=tf.train.Features(feature={
                    'height': _int64_feature(rows),
                    'width': _int64_feature(cols),
                    'depth': _int64_feature(depth),
                    'label': _int64_feature(int(labels[index])),
                    'image': _bytes_feature(image_raw)
                }))
                writer.write(example.SerializeToString())
    
    if num_shards == 1:
        _process_examples(0, data_set.num_examples, _data_path(data_directory, name))
    else:
        total_examples = data_set.num_examples
        samples_per_shard = total_examples // num_shards

        for shard in range(num_shards):
            start_index = shard * samples_per_shard
            end_index = start_index + samples_per_shard
            _process_examples(start_index, end_index, _data_path(data_directory, f'{name}-{shard+1}'))

    print()

def convert_to_tf_record(data_directory:str):
    """Convert the TF MNIST Dataset to TFRecord formats
    
    Args:
        data_directory: The directory where the TFRecord files should be stored
    """

    mnist = input_data.read_data_sets(
        "/tmp/tensorflow/mnist/input_data", 
        reshape=False
    )
    
    convert_to(mnist.validation, 'validation', data_directory)
    convert_to(mnist.train, 'train', data_directory, num_shards=10)
    convert_to(mnist.test, 'test', data_directory)
convert_to_tf_record("./mnist")


Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Instructions for updating:
Please write your own downloading logic.
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /tmp/tensorflow/mnist/input_data/train-images-idx3-ubyte.gz
Instructions for updating:
Please use tf.data to implement this functionality.
Extracting /tmp/tensorflow/mnist/input_data/train-labels-idx1-ubyte.gz
Extracting /tmp/tensorflow/mnist/input_data/t10k-images-idx3-ubyte.gz
Extracting /tmp/tensorflow/mnist/input_data/t10k-labels-idx1-ubyte.gz
Instructions for updating:
Please use alternatives such as official/mnist/dataset.py from tensorflow/models.
Processing validation data
Processing sample 5000 of 5000
Processing train data
Processing sample 9244 of 100000

In [5]:
def data_input_fn(filenames, batch_size=1000, shuffle=False):
    
    def _parser(record):
        features={
            'label': tf.FixedLenFeature([], tf.int64),
            'image': tf.FixedLenFeature([], tf.string)
        }
        parsed_record = tf.parse_single_example(record, features)
        image = tf.decode_raw(parsed_record['image'], tf.float32)

        label = tf.cast(parsed_record['label'], tf.int32)

        return image, tf.one_hot(label, depth=10)
    
    def _iter():
        dataset = (tf.data.TFRecordDataset(filenames)
            .map(_parser))
        if shuffle:
            dataset = dataset.shuffle(buffer_size=10_000)

        dataset = dataset.repeat(None) # Infinite iterations: let experiment determine num_epochs
        dataset = dataset.batch(batch_size)
        
        iterator = dataset.make_one_shot_iterator()
        return iterator
    
    def _input_fn():        
        iterator = _iter()
        features, labels = iterator.get_next()
        
        return features, labels
    return _iter

In [36]:
image, label = data_input_fn("./mnist/train-1.tfrecords", batch_size=100)()

In [None]:
def bn(x, dim, is_training=True, update_batch_stats=True, collections=None, name="bn"):
    params_shape = (dim,)
    n = tf.to_float(tf.reduce_prod(tf.shape(x)[:-1]))
    axis = list(range(int(tf.shape(x).get_shape().as_list()[0]) - 1))
    mean = tf.reduce_mean(x, axis)
    var = tf.reduce_mean(tf.pow(x - mean, 2.0), axis)
    avg_mean = tf.get_variable(
        name=name + "_mean",
        shape=params_shape,
        initializer=tf.constant_initializer(0.0),
        collections=collections,
        trainable=False
    )

    avg_var = tf.get_variable(
        name=name + "_var",
        shape=params_shape,
        initializer=tf.constant_initializer(1.0),
        collections=collections,
        trainable=False
    )

    gamma = tf.get_variable(
        name=name + "_gamma",
        shape=params_shape,
        initializer=tf.constant_initializer(1.0),
        collections=collections
    )

    beta = tf.get_variable(
        name=name + "_beta",
        shape=params_shape,
        initializer=tf.constant_initializer(0.0),
        collections=collections,
    )

    if is_training:
        avg_mean_assign_op = tf.no_op()
        avg_var_assign_op = tf.no_op()
        if update_batch_stats:
            avg_mean_assign_op = tf.assign(
                avg_mean,
                FLAGS.bn_stats_decay_factor * avg_mean + (1 - FLAGS.bn_stats_decay_factor) * mean)
            avg_var_assign_op = tf.assign(
                avg_var,
                FLAGS.bn_stats_decay_factor * avg_var + (n / (n - 1))
                * (1 - FLAGS.bn_stats_decay_factor) * var)

        with tf.control_dependencies([avg_mean_assign_op, avg_var_assign_op]):
            z = (x - mean) / tf.sqrt(1e-6 + var)
    else:
        z = (x - avg_mean) / tf.sqrt(1e-6 + avg_var)

    return gamma * z + beta


In [6]:
class MLP(object):
    """MLP"""

    def __init__(self, config):
        self.num_classes = 10
        self.var_list = []
        self.init_ops = None
        self.activation = "relu" # config.activation
        self.num_units_fc_layers = [1200, 1200] # config.num_units_fc_layers
        self.batch_norm = True

    def __call__(self, images, is_training=False):
        """Builds model."""
        endpoints = {}
        net = images
        print(net.shape)
        reuse = tf.AUTO_REUSE

        net = tf.layers.flatten(net)

        for i, num_units in enumerate(self.num_units_fc_layers):
            layer_suffix = "layer%d" % i
            with tf.variable_scope(os.path.join("mnist_network", "fc_" + layer_suffix), reuse=reuse):
                net = tf.layers.dense(
                    net,
                    num_units,
                    activation=self.activation,
                    use_bias=True)

            endpoints["fc_" + layer_suffix] = net

        with tf.variable_scope(os.path.join("mnist_network", "output_layer"), reuse=reuse):
            logits = tf.layers.dense(
                net,
                self.num_classes,
                activation=None)
        endpoints["logits"] = net

        return logits, endpoints

In [7]:
import os
import tensorflow as tf
import tensorflow.contrib.eager as tfe
from tensorflow.examples.tutorials.mnist import input_data



def softmax_model(image_batch):
    model = MLP(None)
    logits, endpoints = model(image_batch, is_training=True)
    model_output = tf.nn.softmax(logits)
    return model_output


def cross_entropy(model_output, label_batch):
    loss = tf.reduce_mean(
        -tf.reduce_sum(label_batch * tf.log(model_output),
        reduction_indices=[1]))
    return loss


@tfe.implicit_value_and_gradients
def cal_gradient(image_batch, label_batch):
    return cross_entropy(softmax_model(image_batch), label_batch)

In [15]:
model = MLP(None)
logits, endpoints = model(image_batch, is_training=True)


(100, 784)


In [19]:
for layer in layers_list:
    print(layer)
    # break

tf.Tensor(
[[0.18038413 0.22659564 0.         ... 0.         0.         0.3759577 ]
 [0.36179423 0.36371958 0.         ... 0.         0.         0.        ]
 [0.20473713 0.3723094  0.         ... 0.         0.1492198  0.        ]
 ...
 [0.25645143 0.6003772  0.         ... 0.         0.13306507 0.        ]
 [0.01401306 0.30898386 0.         ... 0.         0.         0.        ]
 [0.05506128 0.23868443 0.00085331 ... 0.         0.         0.29849178]], shape=(100, 1200), dtype=float32)
tf.Tensor(
[[0.06070615 0.04257288 0.05172792 ... 0.29537678 0.         0.1104888 ]
 [0.         0.         0.25382587 ... 0.         0.14778559 0.        ]
 [0.         0.04479885 0.07401633 ... 0.         0.         0.        ]
 ...
 [0.16355065 0.         0.         ... 0.26173037 0.         0.        ]
 [0.02450862 0.         0.         ... 0.05797724 0.         0.04237015]
 [0.         0.10344929 0.         ... 0.4595394  0.00108536 0.09780887]], shape=(100, 1200), dtype=float32)
tf.Tensor(
[[0.06070

In [18]:
model = MLP(None)

one_hot_labels = label_batch
top_k = 1
with tf.GradientTape() as t:
    logits, endpoints = model(image_batch, is_training=True)
    layers_list = [endpoints[e] for e in endpoints]
    class_prob = tf.nn.softmax(logits)
    # Pick the correct class probability.
    correct_class_prob = tf.reduce_sum(class_prob * one_hot_labels, axis=1, keepdims=True)

    # Class probabilities except the correct.
    other_class_prob = class_prob * (1. - one_hot_labels)
    if top_k > 1:
        # Pick the top k class probabilities other than the correct.
        top_k_class_prob, _ = tf.nn.top_k(other_class_prob, k=top_k)
    else:
        top_k_class_prob = tf.reduce_max(other_class_prob, axis=1, keepdims=True)

    # Difference between correct class probailities and top_k probabilities.
    difference_prob = correct_class_prob - top_k_class_prob
    losses_list = []
    for layer in layers_list:
        difference_prob_grad = [
            tf.layers.flatten(t.gradient(difference_prob[:, i], layer)[0])
            for i in range(top_k)
        ]

(100, 784)


ValueError: Input 0 of layer flatten_9 is incompatible with the layer: : expected min_ndim=2, found ndim=1. Full shape received: [1200]

In [58]:
large_margin(logits=logits, one_hot_labels=label_batch, layers_list=endpoints)

RuntimeError: tf.gradients is not supported when eager execution is enabled. Use tf.GradientTape instead.

In [55]:

def get_norm_fn(norm_type):
    norm_fn = lambda x: tf.norm(x, ord=norm_type)
    return norm_fn


def maximum_with_relu(a, b):
    return a + tf.nn.relu(b - a)


def _ensure_large_margin_args(name, sentinel, one_hot_labels, logits, layers_list, dist_norm, loss_type):
    """Ensures arguments are correct."""
    # Make sure that all arguments were passed as named arguments.
    if sentinel is not None:
        raise ValueError(
            "Only call `%s` with "
            "named arguments (one_hot_labels=..., logits=..., ...)" % name)
    if one_hot_labels is None or logits is None or not layers_list:
        raise ValueError("logits, one_hot_labels and layers_list must be provided.")

    if dist_norm not in {1, 2, np.inf}:
        raise ValueError("dist_norm must be 1, 2, or np.inf.")

    if loss_type not in {"all_top_k", "average_top_k", "worst_top_k"}:
        raise ValueError(
            "loss_type must be 'all_top_k', 'average_top_k', or 'worst_top_k'.")


# pylint: disable=invalid-name
def large_margin(_sentinel=None, logits=None, one_hot_labels=None, layers_list=None, gamma=10000, alpha_factor=2, top_k=1, dist_norm=2,
                 epsilon=1e-8, use_approximation=True, loss_type="all_top_k", loss_collection=tf.GraphKeys.LOSSES):
    """Creates a large margin loss.

    Args:
        _sentinel: Used to prevent positional parameters. Internal, do not use.
        logits: Float `[batch_size, num_classes]` logits outputs of the network.
        one_hot_labels: `[batch_size, num_classes]` Target integer labels in `{0,
            1}`.
        layers_list: List of network Tensors at different layers. The large margin
            is enforced at the layers specified.
        gamma: Desired margin, and distance to boundary above the margin will be
            clipped.
        alpha_factor: Factor to determine the lower bound of margin. Both gamma and
            alpha_factor determine points to include in training the margin these
            points lie with distance to boundary of [gamma * (1 - alpha), gamma]
        top_k: Number of top classes to include in the margin loss.
        dist_norm: Distance to boundary defined on norm (options: be 1, 2, np.inf).
        epsilon: Small number to avoid division by 0.
        use_approximation: If true, use approximation of the margin gradient for
            less computationally expensive training.
        loss_type: 'worst_top_k', 'average_top_k', or 'all_top_k'. If 'worst_top_k'
            only consider the minimum distance to boundary of the top_k classes. If
            'average_top_k' consider average distance to boundary. If 'all_top_k'
            consider all top_k. When top_k = 1, these choices are equivalent.
        loss_collection: Collection to which the loss will be added.

    Returns:
        loss: Scalar `Tensor` of the same type as `logits`.
    Raises:
        ValueError: If the shape of `logits` doesn't match that of
            `one_hot_labels`.    Also if `one_hot_labels` or `logits` is None.
    """

    _ensure_large_margin_args("large_margin", _sentinel, one_hot_labels, logits, layers_list, dist_norm, loss_type)
    logits = tf.convert_to_tensor(logits)
    one_hot_labels = tf.cast(one_hot_labels, logits.dtype)
    logits.get_shape().assert_is_compatible_with(one_hot_labels.get_shape())
    assert top_k > 0
    assert top_k <= logits.get_shape()[1]

    dual_norm = {1: np.inf, 2: 2, np.inf: 1}
    norm_fn = get_norm_fn(dual_norm[dist_norm])
    with tf.name_scope("large_margin_loss"):
        class_prob = tf.nn.softmax(logits)
        # Pick the correct class probability.
        correct_class_prob = tf.reduce_sum(class_prob * one_hot_labels, axis=1, keepdims=True)

        # Class probabilities except the correct.
        other_class_prob = class_prob * (1. - one_hot_labels)
        if top_k > 1:
            # Pick the top k class probabilities other than the correct.
            top_k_class_prob, _ = tf.nn.top_k(other_class_prob, k=top_k)
        else:
            top_k_class_prob = tf.reduce_max(other_class_prob, axis=1, keepdims=True)

        # Difference between correct class probailities and top_k probabilities.
        difference_prob = correct_class_prob - top_k_class_prob
        losses_list = []
        for layer in layers_list:
            difference_prob_grad = [
                tf.layers.flatten(tf.gradients(difference_prob[:, i], layer)[0])
                for i in range(top_k)
            ]

            difference_prob_gradnorm = tf.concat([
                tf.map_fn(norm_fn, difference_prob_grad[i])[:, tf.newaxis]
                for i in range(top_k)
            ], axis=1)

            if use_approximation:
                difference_prob_gradnorm = tf.stop_gradient(difference_prob_gradnorm)

            distance_to_boundary = difference_prob / (difference_prob_gradnorm + epsilon)

            if loss_type == "worst_top_k":
                # Only consider worst distance to boundary.
                distance_to_boundary = tf.reduce_min(distance_to_boundary, axis=1)

            elif loss_type == "average_top_k":
                # Only consider average distance to boundary.
                distance_to_boundary = tf.reduce_mean(distance_to_boundary, axis=1)

            # Distances to consider between distance_upper and distance_lower bounds
            distance_upper = gamma
            distance_lower = gamma * (1 - alpha_factor)

            # Enforce lower bound.
            loss_layer = maximum_with_relu(distance_to_boundary, distance_lower)

            # Enforce upper bound.
            loss_layer = maximum_with_relu(
                0, distance_upper - loss_layer) - distance_upper

            losses_list.append(tf.reduce_mean(loss_layer))

        loss = tf.reduce_mean(losses_list)
        # Add loss to loss_collection.
        tf.losses.add_loss(loss, loss_collection)
    return loss

In [8]:

data = input_data.read_data_sets("/home/xyang2/project/data/dataset/mnist/", one_hot=True)
train_ds = tf.data.Dataset.from_tensor_slices((data.train.images, data.train.labels))\
    .map(lambda x, y: (x, tf.cast(y, tf.float32)))\
    .shuffle(buffer_size=1000)\
    .batch(100)\

optimizer = tf.train.GradientDescentOptimizer(0.5)

for step, (image_batch, label_batch) in enumerate(tfe.Iterator(train_ds)):
    print(image_batch.shape)
    loss, grads_and_vars = cal_gradient(image_batch, label_batch)
    optimizer.apply_gradients(grads_and_vars)
    if step % 100:
        print("step: {}  loss: {}".format(step, loss.numpy()))
    break

model_test_output = softmax_model(data.test.images)
model_test_label = data.test.labels
correct_prediction = tf.equal(tf.argmax(model_test_output, 1), tf.argmax(model_test_label, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))

print("test accuracy = {}".format(accuracy.numpy()))


Extracting /home/xyang2/project/data/dataset/mnist/train-images-idx3-ubyte.gz
Extracting /home/xyang2/project/data/dataset/mnist/train-labels-idx1-ubyte.gz
Instructions for updating:
Please use tf.one_hot on tensors.
Extracting /home/xyang2/project/data/dataset/mnist/t10k-images-idx3-ubyte.gz
Extracting /home/xyang2/project/data/dataset/mnist/t10k-labels-idx1-ubyte.gz
(100, 784)
(100, 784)
(10000, 784)


AttributeError: 'tuple' object has no attribute 'ndims'