In [1]:
import tensorflow as tf
import numpy as np
import tensorflow.keras.layers as nn
import os
from riptide.utils.preprocessing.cifarnet_preprocessing import preprocess_image

from matplotlib import pyplot as plt
%matplotlib inline

In [2]:
@tf.custom_gradient
def AlphaClip(x, alpha):
    output = tf.clip_by_value(x, 0, alpha)

    def grad_fn(dy):
        x_grad_mask = tf.cast(tf.logical_and(x >= 0, x <= alpha), tf.float32)
        alpha_grad_mask = tf.cast(x >= alpha, tf.float32)
        alpha_grad = tf.reduce_sum(dy * alpha_grad_mask)
        x_grad = dy * x_grad_mask
        
        return [x_grad, alpha_grad]

    return output, grad_fn

@tf.custom_gradient
def AlphaQuantize(x, alpha, bits):
    output = tf.round(x * ((2**bits - 1) / alpha)) * (alpha / (2**bits - 1))
    
    def grad_fn(dy):
        return [dy, None, None]
    
    return output, grad_fn

class PACT(tf.keras.layers.Layer):
    def __init__(self, quantize=False, bits=2.):
        super(PACT, self).__init__()      
        self.quantize = quantize
        self.bits = bits
        
    def build(self, input_shape):
        self.alpha = self.add_variable(
            'alpha', shape=[], 
            initializer=tf.keras.initializers.Constant([10.], dtype=tf.float32),
            #regularizer=tf.keras.regularizers.l2(0.01))
            regularizer = tf.keras.regularizers.l2(0.0002))
        
    def call(self, inputs):
        outputs = AlphaClip(inputs, self.alpha)
        if self.quantize:
            with tf.name_scope('QA'):
                outputs = AlphaQuantize(outputs, self.alpha, self.bits)
                tf.summary.histogram('activation', inputs)
                tf.summary.histogram('quantized_activation', outputs)
        return outputs
    
    def get_config(self):
        return {'quantize': self.quantize, 'bits': self.bits}
    
    def compute_output_shape(self, input_shape):
        return input_shape

In [3]:
def get_sawb_coefficients(bits):
    bits = int(bits)
    assert bits <= 4, "Currently only supports bitwidths up to 4."
    coefficient_dict = {1: [0., 1.], 2: [3.19, -2.14], 3: [7.40, -6.66], 4: [11.86, -11.68]}
    return coefficient_dict[bits]

@tf.custom_gradient
def SAWBQuantize(x, alpha, bits):
    # Clip between -alpha and alpha
    clipped = tf.clip_by_value(x, -alpha, alpha)
    # Rescale to [0, alpha]
    scaled = (clipped + alpha) / 2.
    # Quantize.
    quantized = tf.round(scaled * ((2**bits - 1) / alpha)) * (alpha / (2**bits - 1))
    # Rescale to negative range.
    output = (2 * quantized) - alpha
    
    def grad_fn(dy):
        return [dy, None, None]
    return output, grad_fn

class SAWBConv2D(tf.keras.layers.Conv2D):
    def __init__(self, *args, bits=2., **kwargs):
        super(SAWBConv2D, self).__init__(*args, **kwargs)
        self.bits = float(bits)
        self.c1, self.c2 = get_sawb_coefficients(bits)
        self.alpha = None
        
    def call(self, inputs):
        # Compute proper scale for our weights.
        alpha = self.c1 * tf.sqrt(tf.reduce_mean(self.kernel**2)) + self.c2 * tf.reduce_mean(tf.abs(self.kernel))
        self.alpha = alpha
        # Quantize kernel
        with tf.name_scope("QW"):
            q_kernel = SAWBQuantize(self.kernel, alpha, self.bits)
            tf.summary.histogram("weight", self.kernel)
            tf.summary.histogram("quantized_weight", q_kernel)
        
        # Invoke convolution
        outputs = self._convolution_op(inputs, q_kernel)
        
        if self.use_bias:
            if self.data_format == 'channels_first':
                outputs = tf.nn.bias_add(
                    outputs, self.bias, data_format='NCHW')
            else:
                outputs = tf.nn.bias_add(
                    outputs, self.bias, data_format='NHWC')

        if self.activation is not None:
            outputs = self.activation(outputs)

        return outputs

In [4]:
def preprocess(image, label):
    image = tf.image.resize_image_with_crop_or_pad(image, 40, 40)
    image = tf.random_crop(image, [32, 32, 3])
    image = tf.image.random_flip_left_right(image)
    return image, label

In [5]:
batch_size = 128
(train_data, train_labels), (test_data, test_labels) = tf.keras.datasets.cifar10.load_data()

train_labels = train_labels.astype(np.int32)
test_labels = test_labels.astype(np.int32)

AUTOTUNE = tf.data.experimental.AUTOTUNE

def train_transform(data, label):
    data = preprocess_image(data, 32, 32, is_training=True)
    return data, label

def test_transform(data, label):
    data = preprocess_image(data, 32, 32, is_training=False)
    return data, label

def train_input_fn():
    ds = tf.data.Dataset.from_tensor_slices((train_data, train_labels))
    ds = ds.prefetch(batch_size)
    ds = ds.shuffle(10000)
    ds = ds.repeat()
    ds = ds.map(train_transform, num_parallel_calls=AUTOTUNE)
    ds = ds.batch(batch_size)
    ds = ds.prefetch(AUTOTUNE)
    return ds

def test_input_fn():
    ds = tf.data.Dataset.from_tensor_slices((test_data, test_labels))
    ds = ds.map(test_transform, num_parallel_calls=AUTOTUNE)
    ds = ds.batch(batch_size)
    ds = ds.repeat(1)
    return ds

In [6]:
cfg = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}

class VGG(tf.keras.models.Model):
    def __init__(self, name, *args, **kwargs):
        super(VGG, self).__init__(*args, **kwargs)
        self.reg = tf.keras.regularizers.l2(0.0002)
        self.features = self._make_layers(cfg[name])
        self.flatten = nn.Flatten()
        self.classifier = nn.Dense(10, activation='softmax', kernel_regularizer=self.reg)
    
    def call(self, inputs, training=True):
        features = self.features(inputs, training=training)
        features = self.flatten(features)
        output = self.classifier(features)
        
        return output
    
    def _make_layers(self, cfg):
        layers = [nn.Conv2D(cfg[0], kernel_size=3, padding='same', kernel_regularizer=self.reg), nn.BatchNormalization(), nn.Activation('relu')]#PACT(quantize=True)]
        for x in cfg[1:]:
            if x == 'M':
                layers += [nn.MaxPool2D(pool_size=2, strides=2)]
            else:
                layers += [nn.Conv2D(x, kernel_size=3, padding='same'),
                           #SAWBConv2D(x, kernel_size=3, padding='same', kernel_regularizer=self.reg),
                           nn.BatchNormalization(),
                           #PACT(quantize=True)]
                           nn.Activation('relu')]
        layers += [nn.GlobalAveragePooling2D()]
        
        return tf.keras.models.Sequential(layers)

In [7]:
tf.compat.v1.summary.image

<function tensorflow.python.summary.summary.image(name, tensor, max_outputs=3, collections=None, family=None)>

In [8]:
def model_fn(features, labels, mode):
    tf.compat.v1.summary.image('images', features, max_outputs=4)
    model = VGG('VGG11')
    
    optimizer = tf.compat.v1.train.AdamOptimizer()
    loss_fn = tf.keras.losses.SparseCategoricalCrossentropy()
    
    training = (mode == tf.estimator.ModeKeys.TRAIN)    
    predictions = model(features, training=training)
        
    reg_losses = model.get_losses_for(None) + model.get_losses_for(features)
    total_loss = loss_fn(labels, predictions)
    if reg_losses:
        total_loss += tf.math.add_n(reg_losses)
    
    accuracy = tf.compat.v1.metrics.accuracy(labels=labels,
                                             predictions=tf.math.argmax(predictions, axis=-1),
                                             name='acc_op')
    
    update_ops = model.get_updates_for(None) + model.get_updates_for(features)
    with tf.control_dependencies(update_ops):
        train_op = optimizer.minimize(
            total_loss,
            var_list=model.trainable_variables,
            global_step=tf.compat.v1.train.get_or_create_global_step())
        
    return tf.estimator.EstimatorSpec(
        mode=mode,
        predictions=predictions,
        loss=total_loss,
        train_op=train_op,
        eval_metric_ops={'accuracy':accuracy})

In [9]:
experiment_name = 'vgg_baseline_full5'
model_path = os.path.join('/data', 'jwfromm', 'cifar_models', experiment_name)

tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)

sessconfig = tf.compat.v1.ConfigProto()
sessconfig.gpu_options.allow_growth=True

runconfig = tf.estimator.RunConfig(session_config=sessconfig)

classifier = tf.estimator.Estimator(
    model_fn=model_fn, model_dir=model_path, config=runconfig)

NUM_STEPS = np.ceil(len(train_data)/batch_size)
EPOCHS = 200

train_spec = tf.estimator.TrainSpec(
    input_fn=train_input_fn, max_steps=NUM_STEPS * EPOCHS)
eval_spec = tf.estimator.EvalSpec(input_fn=test_input_fn)

I0411 01:08:14.101528 139774117242624 estimator.py:202] Using config: {'_keep_checkpoint_every_n_hours': 10000, '_device_fn': None, '_eval_distribute': None, '_save_summary_steps': 100, '_task_id': 0, '_global_id_in_cluster': 0, '_model_dir': '/data/jwfromm/cifar_models/vgg_baseline_full5', '_master': '', '_service': None, '_is_chief': True, '_save_checkpoints_steps': None, '_train_distribute': None, '_experimental_distribute': None, '_keep_checkpoint_max': 5, '_tf_random_seed': None, '_num_ps_replicas': 0, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f1f13abe2b0>, '_protocol': None, '_num_worker_replicas': 1, '_task_type': 'worker', '_log_step_count_steps': 100, '_evaluation_master': '', '_session_config': gpu_options {
  allow_growth: true
}
, '_save_checkpoints_secs': 600}


In [10]:
tf.estimator.train_and_evaluate(classifier, train_spec, eval_spec)

I0411 01:08:14.160732 139774117242624 estimator_training.py:186] Not using Distribute Coordinator.
I0411 01:08:14.162415 139774117242624 training.py:612] Running training and evaluation locally (non-distributed).
I0411 01:08:14.164015 139774117242624 training.py:700] Start train and evaluate loop. The evaluate will happen after every checkpoint. Checkpoint frequency is determined based on RunConfig arguments: save_checkpoints_steps None or save_checkpoints_secs 600.
W0411 01:08:14.191110 139774117242624 deprecation.py:323] From /usr/local/lib/python3.5/dist-packages/tensorflow/python/training/training_util.py:238: Variable.initialized_value (from tensorflow.python.ops.variables) is deprecated and will be removed in a future version.
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.
W0411 01:08:15.541453 139774117242624 deprecation.py:323] From /usr/local/lib/python3.5/dist-packages/tens

InvalidArgumentError: buffer_size must be greater than zero.
	 [[node ShuffleDataset (defined at <ipython-input-5-82ceecc69bf2>:20) ]]

Errors may have originated from an input operation.
Input Source operations connected to node ShuffleDataset:
 PrefetchDataset (defined at <ipython-input-5-82ceecc69bf2>:19)

Original stack trace for 'ShuffleDataset':
  File "/usr/lib/python3.5/runpy.py", line 184, in _run_module_as_main
    "__main__", mod_spec)
  File "/usr/lib/python3.5/runpy.py", line 85, in _run_code
    exec(code, run_globals)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel_launcher.py", line 16, in <module>
    app.launch_new_instance()
  File "/usr/local/lib/python3.5/dist-packages/traitlets/config/application.py", line 658, in launch_instance
    app.start()
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelapp.py", line 505, in start
    self.io_loop.start()
  File "/usr/local/lib/python3.5/dist-packages/tornado/platform/asyncio.py", line 148, in start
    self.asyncio_loop.run_forever()
  File "/usr/lib/python3.5/asyncio/base_events.py", line 345, in run_forever
    self._run_once()
  File "/usr/lib/python3.5/asyncio/base_events.py", line 1312, in _run_once
    handle._run()
  File "/usr/lib/python3.5/asyncio/events.py", line 125, in _run
    self._callback(*self._args)
  File "/usr/local/lib/python3.5/dist-packages/tornado/ioloop.py", line 690, in <lambda>
    lambda f: self._run_callback(functools.partial(callback, future))
  File "/usr/local/lib/python3.5/dist-packages/tornado/ioloop.py", line 743, in _run_callback
    ret = callback()
  File "/usr/local/lib/python3.5/dist-packages/tornado/gen.py", line 781, in inner
    self.run()
  File "/usr/local/lib/python3.5/dist-packages/tornado/gen.py", line 742, in run
    yielded = self.gen.send(value)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 370, in dispatch_queue
    yield self.process_one()
  File "/usr/local/lib/python3.5/dist-packages/tornado/gen.py", line 225, in wrapper
    runner = Runner(result, future, yielded)
  File "/usr/local/lib/python3.5/dist-packages/tornado/gen.py", line 708, in __init__
    self.run()
  File "/usr/local/lib/python3.5/dist-packages/tornado/gen.py", line 742, in run
    yielded = self.gen.send(value)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 357, in process_one
    yield gen.maybe_future(dispatch(*args))
  File "/usr/local/lib/python3.5/dist-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 267, in dispatch_shell
    yield gen.maybe_future(handler(stream, idents, msg))
  File "/usr/local/lib/python3.5/dist-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/kernelbase.py", line 534, in execute_request
    user_expressions, allow_stdin,
  File "/usr/local/lib/python3.5/dist-packages/tornado/gen.py", line 209, in wrapper
    yielded = next(result)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/ipkernel.py", line 294, in do_execute
    res = shell.run_cell(code, store_history=store_history, silent=silent)
  File "/usr/local/lib/python3.5/dist-packages/ipykernel/zmqshell.py", line 536, in run_cell
    return super(ZMQInteractiveShell, self).run_cell(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2848, in run_cell
    raw_cell, store_history, silent, shell_futures)
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 2874, in _run_cell
    return runner(coro)
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/async_helpers.py", line 67, in _pseudo_sync_runner
    coro.send(None)
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 3049, in run_cell_async
    interactivity=interactivity, compiler=compiler, result=result)
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 3220, in run_ast_nodes
    if (yield from self.run_code(code, result)):
  File "/usr/local/lib/python3.5/dist-packages/IPython/core/interactiveshell.py", line 3296, in run_code
    exec(code_obj, self.user_global_ns, self.user_ns)
  File "<ipython-input-10-32f2c42a6c68>", line 1, in <module>
    tf.estimator.train_and_evaluate(classifier, train_spec, eval_spec)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/training.py", line 473, in train_and_evaluate
    return executor.run()
  File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/training.py", line 613, in run
    return self.run_local()
  File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/training.py", line 714, in run_local
    saving_listeners=saving_listeners)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 359, in train
    loss = self._train_model(input_fn, hooks, saving_listeners)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1139, in _train_model
    return self._train_model_default(input_fn, hooks, saving_listeners)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1166, in _train_model_default
    input_fn, ModeKeys.TRAIN))
  File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1003, in _get_features_and_labels_from_input_fn
    self._call_input_fn(input_fn, mode))
  File "/usr/local/lib/python3.5/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1094, in _call_input_fn
    return input_fn(**kwargs)
  File "<ipython-input-5-82ceecc69bf2>", line 20, in train_input_fn
    ds = ds.shuffle(AUTOTUNE)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 741, in shuffle
    return ShuffleDataset(self, buffer_size, seed, reshuffle_each_iteration)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/data/ops/dataset_ops.py", line 2694, in __init__
    **flat_structure(self))
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/ops/gen_dataset_ops.py", line 4763, in shuffle_dataset
    name=name)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/op_def_library.py", line 800, in _apply_op_helper
    op_def=op_def)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/util/deprecation.py", line 507, in new_func
    return func(*args, **kwargs)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 3479, in create_op
    op_def=op_def)
  File "/usr/local/lib/python3.5/dist-packages/tensorflow/python/framework/ops.py", line 1961, in __init__
    self._traceback = tf_stack.extract_stack()
