In [1]:
import tensorflow as tf
import numpy as np
import tensorflow.keras.layers as nn

tf.enable_eager_execution()

In [2]:
@tf.custom_gradient
def AlphaClip(x, alpha):
    output = tf.clip_by_value(x, 0, alpha)

    def grad_fn(dy):
        x_grad_mask = tf.cast(tf.logical_and(x >= 0, x <= alpha), tf.float32)
        alpha_grad_mask = tf.cast(x >= alpha, tf.float32)
        alpha_grad = tf.reduce_sum(dy * alpha_grad_mask)
        x_grad = dy * x_grad_mask
        
        return [x_grad, alpha_grad]

    return output, grad_fn

@tf.custom_gradient
def AlphaQuantize(x, alpha, bits):
    output = tf.round(x * ((2**bits - 1) / alpha)) * (alpha / (2**bits - 1))
    
    def grad_fn(dy):
        return [dy, None, None]
    
    return output, grad_fn

class PACT(tf.keras.layers.Layer):
    def __init__(self, quantize=False, bits=2.):
        super(PACT, self).__init__()      
        self.quantize = quantize
        self.bits = bits
        
    def build(self, input_shape):
        self.alpha = self.add_variable(
            'alpha', shape=[], 
            initializer=tf.keras.initializers.Constant([10.], dtype=tf.float32),
            regularizer=tf.keras.regularizers.l2(0.01))
        
    def call(self, inputs):
        outputs = AlphaClip(inputs, self.alpha)
        if self.quantize:
            with tf.name_scope('QA'):
                outputs = AlphaQuantize(outputs, self.alpha, self.bits)
                tf.summary.histogram('quantized_activation', outputs)
        return outputs
    
    def get_config(self):
        return {'quantize': self.quantize, 'bits': self.bits}
    
    def compute_output_shape(self, input_shape):
        return input_shape

In [None]:
@tf.custom_gradient
def SAWBQuantize(x, alpha, bits):
    # Clip between -alpha and alpha
    clipped = tf.clip_by_value(x, -alpha, alpha)
    # Rescale to [0, alpha]
    scaled = (clipped + alpha) / 2.
    # Quantize.
    quantized = tf.round(scaled * ((2**bits - 1) / alpha)) * (alpha / (2**bits - 1))
    # Rescale to negative range.
    output = (2 * quantized) - alpha
    
    def grad_fn(dy):
        x_mask = tf.cast(tf.abs(x) <= alpha, tf.float32)
        x_grad = dy * x_mask
        return [x_grad, None, None]
    return output, grad_fn

class SAWBConv2D(tf.keras.layers.Conv2D):
    def __init__(self, *args, **kwargs):
        super(SAWBConv2D, self).__init__(*args, **kwargs)
        # For now hardcode coefficients.
        self.c1 = 3.2
        self.c2 = -2.1
        self.bits = 2.
        self.alpha = None
        
    def call(self, inputs):
        # Compute proper scale for our weights.
        alpha = self.c1 * tf.sqrt(tf.reduce_mean(self.kernel**2)) + self.c2 * tf.reduce_mean(tf.abs(self.kernel))
        self.alpha = alpha
        # Quantize kernel
        with tf.name_scope("QW"):
            q_kernel = SAWBQuantize(self.kernel, alpha, self.bits)
            print(alpha)
            print(q_kernel)
            tf.summary.histogram("quantized_weight", q_kernel)
        
        # Invoke convolution
        outputs = self._convolution_op(inputs, q_kernel)
        
        if self.use_bias:
            if self.data_format == 'channels_first':
                outputs = tf.nn.bias_add(
                    outputs, self.bias, data_format='NCHW')
            else:
                outputs = tf.nn.bias_add(
                    outputs, self.bias, data_format='NHWC')

        if self.activation is not None:
            outputs = self.activation(outputs)

        return outputs

In [3]:
cfg = {
    'VGG11': [64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG13': [64, 64, 'M', 128, 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M'],
    'VGG16': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 'M', 512, 512, 512, 'M', 512, 512, 512, 'M'],
    'VGG19': [64, 64, 'M', 128, 128, 'M', 256, 256, 256, 256, 'M', 512, 512, 512, 512, 'M', 512, 512, 512, 512, 'M'],
}

class VGG(tf.keras.models.Model):
    def __init__(self, name, *args, **kwargs):
        super(VGG, self).__init__(*args, **kwargs)
        self.features = self._make_layers(cfg[name])
        self.flatten = nn.Flatten()
        self.classifier = nn.Dense(10, activation=None)
    
    def call(self, inputs, training=True):
        features = self.features(inputs, training=training)
        features = self.flatten(features)
        output = self.classifier(features)
        
        return output
    
    def _make_layers(self, cfg):
        layers = [nn.Conv2D(cfg[0], kernel_size=3, padding='same'), nn.BatchNormalization(), nn.Activation('relu')]
        for x in cfg[1:]:
            if x == 'M':
                layers += [nn.MaxPool2D(pool_size=2, strides=2)]
            else:
                layers += [#nn.Conv2D(x, kernel_size=3, padding='same'),
                          SAWBConv2D(x, kernel_size=3, padding='same'),
                           nn.BatchNormalization(),
                           #PACT(quantize=True)]
                           nn.Activation('relu')]
        layers += [nn.GlobalAveragePooling2D()]
        
        return tf.keras.models.Sequential(layers)

In [4]:
model = VGG('VGG11')
test_input = tf.random_normal(shape=[1, 32, 32, 3])
test_output = model(test_input, training=False)

In [5]:
model.load_weights('/data/jwfromm/cifar_models/vgg_pact_a2/model.ckpt-23460')

Instructions for updating:
Restoring a name-based tf.train.Saver checkpoint using the object-based restore API. This mode uses global names to match variables, and so is somewhat fragile. It also adds new restore ops to the graph each time it is called when graph building. Prefer re-encoding training checkpoints in the object-based format: run save() on the object-based saver (the same one this message is coming from) and use that checkpoint in the future.


<tensorflow.python.training.checkpointable.util.NameBasedSaverStatus at 0x7f6f998184a8>

In [6]:
model(test_input, training=False)

<tf.Tensor: id=1511, shape=(1, 10), dtype=float32, numpy=
array([[-6.6998057, -5.364214 , -0.924089 , -3.8044078, -0.9380314,
        -6.679875 ,  4.0117774, -6.212219 , -5.8784456, -4.354133 ]],
      dtype=float32)>

In [13]:
tf.keras.models.Sequential(model.layers[0].layers[:3])(test_input)

<tf.Tensor: id=1542, shape=(1, 32, 32, 64), dtype=float32, numpy=
array([[[[0.        , 0.        , 0.        , ..., 0.67017335,
          0.        , 0.        ],
         [0.44678223, 0.        , 0.67017335, ..., 0.67017335,
          0.        , 0.22339112],
         [0.67017335, 0.        , 0.67017335, ..., 0.        ,
          0.        , 0.        ],
         ...,
         [0.        , 0.67017335, 0.        , ..., 0.        ,
          0.67017335, 0.        ],
         [0.        , 0.        , 0.        , ..., 0.        ,
          0.67017335, 0.        ],
         [0.        , 0.        , 0.        , ..., 0.67017335,
          0.        , 0.        ]],

        [[0.67017335, 0.        , 0.        , ..., 0.67017335,
          0.67017335, 0.        ],
         [0.        , 0.        , 0.        , ..., 0.        ,
          0.67017335, 0.22339112],
         [0.        , 0.44678223, 0.        , ..., 0.        ,
          0.67017335, 0.22339112],
         ...,
         [0.        , 