In [None]:
#  Steps of the work:

# Make a model with accessible parameters using TensorFlow 2.x
# Write an entropy loss and add it to the network parameters for training.
# Train a model by jointly optimizing its loss (e.g. cross entropy for classification) and the entropy for compressability.
# Compress the model parameters using Huffman coding.
# Analyze the results and trade-off between accuracy and compressability.
# Write a model wrapper that can read and write the compressed parameters.

# Notes:

# Can be started on simple datasets like MNIST.
# Can start from simple MLP NNs. And then extend to convs. The implementation should be generic enough so that it can support any layer.
# Bonus: add support for normalization layers: batch norm, instance norm, etc.

In [1]:
# Load cifar10 data set

# from tensorflow.keras.datasets import mnist
# train_set, test_set = mnist.load_data()
import tensorflow as tf
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()


In [2]:
# import os
# os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 
# Allocation of 17179869184 exceeds 10% of free system memory.

In [1]:
# generate a simple NN model

from tensorflow.keras.layers import Dense, Input, Flatten
from tensorflow.keras.models import Model
import tensorflow as tf
from tensorflow import keras
import numpy as np
from tensorflow.keras.layers import Conv2D, BatchNormalization, ReLU

    
kernel_initializer = 'he_normal'

def get_model(chs=128):
#     shape=(32, 32, 3)
    shape=(32,32,64)
#         shape=(32,32,128)
#     shape=(16,16,256) 
# shape=(8,8,256)
# shape=(4,4,256)
#     reduce shape increase channel
# dont do batch
# stride
# do step by step
# run by hpc

# different mlp layer, cnn
# 
    
    inputs = Input(shape)
    layer = Conv2D(chs * 2, (3, 3), padding='same', kernel_initializer=kernel_initializer)(inputs)
    layer = BatchNormalization()(layer)
    layer = ReLU()(layer)
    
    layer = Conv2D(chs, (3, 3), padding='same', kernel_initializer=kernel_initializer)(layer)
    layer = BatchNormalization()(layer)
    layer = ReLU()(layer)
    
    layer = Flatten()(layer)
    print(layer)
    output = Dense(10, activation='linear', use_bias=True, kernel_initializer=kernel_initializer)(layer)

    model = Model(inputs, output)
    return model



In [4]:
model.summary()

Model: "model"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 32, 32, 3)]       0         
                                                                 
 conv2d (Conv2D)             (None, 32, 32, 256)       7168      
                                                                 
 batch_normalization (BatchN  (None, 32, 32, 256)      1024      
 ormalization)                                                   
                                                                 
 re_lu (ReLU)                (None, 32, 32, 256)       0         
                                                                 
 conv2d_1 (Conv2D)           (None, 32, 32, 128)       295040    
                                                                 
 batch_normalization_1 (Batc  (None, 32, 32, 128)      512       
 hNormalization)                                             

In [2]:
#     To estimate the histogram, we first remove outliers in the
# activations if the samples are outside the range [μ−3σ; μ+3σ]
from histogram import calculate_histogram, test_histogram, calculate_histogram_range

def calculate_entropy(variables, eps=0.0):
    min_h, max_h = calculate_histogram_range(variables)
    flat_vars = tf.reshape(variables, (-1,1))    
    hist = calculate_histogram(flat_vars, min_h, max_h)
    
    probs = hist / tf.reduce_sum(hist)
    entropy = -tf.reduce_sum(probs * tf.experimental.numpy.log2(probs+eps))
    return entropy, (min_h, max_h)


def calc_sparsity_regularization(inputs, regularization_coefficient=1e-2):
    abs_inputs = tf.abs(inputs)
    l1_norm = tf.reduce_sum(abs_inputs, axis=0)
    num_activations = tf.cast(tf.shape(inputs)[0], dtype=tf.float32)
    regularization_loss = regularization_coefficient / num_activations * tf.reduce_sum(l1_norm)
    
    return regularization_loss

class CompressibleNN(keras.Model):
    def __init__(self, net_model):
        super(CompressibleNN, self).__init__()
        self.net_model = net_model
        self.CE_loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True)

    def call(self, inputs):
        return self.net_model(inputs)
    
    def entropy_loss(self, inputs):
        entropy = 0
        
        for l in self.net_model.layers:
            if isinstance(l, keras.layers.Dense):
                for v in l.trainable_variables:
                    v_entropy, v_range = calculate_entropy(v)
                    entropy += v_entropy
        return entropy
    
    def regularization_loss(self, inputs):
        rg_loss = 0
        
        for l in self.net_model.layers:
            if isinstance(l, keras.layers.Dense):
                for v in l.trainable_variables:
                    v_regularization_loss = calc_sparsity_regularization(v)
                    rg_loss += v_regularization_loss

        return rg_loss
    
    
    def train_step(self, input):
        images = input[0]
        labels = input[1]

        with tf.GradientTape() as tape:
            output = self.net_model(images)
            loss =  self.entropy_loss(images)
            regularization_loss = self.regularization_loss(images)

        # Get the gradients w.r.t the loss
        gradient = tape.gradient(loss, self.net_model.trainable_variables)
#         gradient_reg = tape.gradient(regularization_loss, self.net_model.trainable_variables)
        # Update the weights using the generator optimizer
        self.optimizer.apply_gradients(
            zip(gradient_reg, self.net_model.trainable_variables)
        )
#         return {"regularization loss": regularization_loss}
        return {"loss": loss}
    
model = get_model()

compNN = CompressibleNN(model)
optimizer = tf.optimizers.Adam(learning_rate=1e-3, beta_1=0.9)
compNN.compile(optimizer, loss=keras.losses.SparseCategoricalCrossentropy(from_logits=True))

KerasTensor(type_spec=TensorSpec(shape=(None, 131072), dtype=tf.float32, name=None), name='flatten/Reshape:0', description="created by layer 'flatten'")


In [None]:
# train the model

tf.data.experimental.enable_debug_mode()

tf.config.run_functions_eagerly(True)
# loss needs to be decreased during training
compNN.fit(x=x_train, y=y_train, epochs=1, batch_size=2)

2023-06-26 07:59:14.424835: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 1342177280 exceeds 10% of free system memory.
2023-06-26 07:59:15.114143: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 1342177280 exceeds 10% of free system memory.
2023-06-26 07:59:16.148544: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 1342177280 exceeds 10% of free system memory.
2023-06-26 07:59:17.848082: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 1342177280 exceeds 10% of free system memory.
2023-06-26 07:59:20.678313: W tensorflow/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 1342177280 exceeds 10% of free system memory.


In [53]:
# test performance
res = model(test_set[0])
print("test acc: ", 100*(np.argmax(res, axis=1)==test_set[1]).mean())

test acc:  96.02000000000001


In [56]:
# test performance with entropy_loss
res = model(test_set[0])
print("test acc: ", 100*(np.argmax(res, axis=1)==test_set[1]).mean())

test acc:  96.61


In [None]:
 17.7708
 17.7171
 17.6588
 