### Quantization Aware Training With Pruning



In [7]:
import tensorflow as tf

import numpy as np
import tempfile
import zipfile
import os

In [23]:
data = np.load('./data_hh4b_20x12_160000.npz')
train_X = data['train_X']      #data for training the quantized model
train_y = data['train_y']      #data labels
test_X = data['test_X']
test_y = data['test_y']
test_X_hw_hh4b = data['test_X_hw_hh4b']
test_y_hw_hh4b = data['test_y_hw_hh4b']
test_X_hw_snu = data['test_X_hw_snu']
test_y_hw_snu = data['test_y_hw_snu']
data = 0

model = tf.keras.models.load_model('pruning_models/unpruned_train_1_test.h5')   # unpruned modelmodel
stripped_pruned_model = tf.keras.models.load_model('pruning_models/pruned_train_1_test.h5') #pruned model - pruning stripped



In [24]:
def compiler(model_name):
    opt = tf.keras.optimizers.Adam(0.001)
    sensitivity_metric = tf.keras.metrics.SensitivityAtSpecificity(name='sens_at_spec',
                                                                             specificity=0.99925,     
                                                                             num_thresholds=20000)     
    auc_metric = tf.keras.metrics.AUC(name='auc', num_thresholds=200)   
    metrics = ['accuracy', sensitivity_metric, auc_metric]

    model_name.compile(optimizer=opt, loss='binary_crossentropy', metrics=metrics)
    
    return model_name

    


def trainer(model_name, train, savename, stripping):   #function for training and saving models
    if train:
        opt = tf.keras.optimizers.Adam(0.001)
        log_dir = tempfile.mkdtemp()
        sensitivity_metric = tf.keras.metrics.SensitivityAtSpecificity(name='sens_at_spec',
                                                                                 specificity=0.99925,     
                                                                                 num_thresholds=20000)     
        auc_metric = tf.keras.metrics.AUC(name='auc', num_thresholds=200)   
        metrics = ['accuracy', sensitivity_metric, auc_metric]

        model_name.compile(optimizer=opt, loss='binary_crossentropy', metrics=metrics)    

        model_name.fit(train_X, 
                   train_y, 
                   epochs=50, 
                   verbose=1,
                   batch_size=512, 
                   validation_split=.2,   
                   shuffle=True,
                   callbacks=[tf.keras.callbacks.EarlyStopping(monitor='val_loss',
                                                                 patience=5,
                                                                 restore_best_weights=True),     #What does callbacks do?
                                pruning_callbacks.UpdatePruningStep(),
                                tfmot.sparsity.keras.PruningSummaries(log_dir=log_dir)])
            # Save the model again but with the pruning 'stripped' to use the regular layer types
        if stripping:
            model_stripped = strip_pruning(model_name)
            model_stripped.save(savename)
        else:
            model_name.save(savename)
    else:
        model_name = load_model(savename)
        

def print_model_weights_sparsity(model):

    for layer in model.layers:
        if isinstance(layer, tf.keras.layers.Wrapper):
            weights = layer.trainable_weights
        else:
            weights = layer.weights
        for weight in weights:
            # ignore auxiliary quantization weights
            if "quantize_layer" in weight.name:
                continue
            weight_size = weight.numpy().size
            zero_num = np.count_nonzero(weight == 0)
            print(
                f"{weight.name}: {zero_num/weight_size:.2%} sparsity ",
                f"({zero_num}/{weight_size})",
            )

In [34]:
#print_model_weights_sparsity(model)
#print_model_weights_sparsity(pruned_model)
compiler(model)
compiler(stripped_pruned_model)
#pruned_model.evaluate(test_X, test_y)
#stripped_pruned_model.summary()

<tensorflow.python.keras.engine.sequential.Sequential at 0x7f0e63374a00>

In [35]:
from qkeras import *
import hls4ml
from hls4ml.model.profiling import numerical, activations_keras, boxplot

import matplotlib.pyplot as plt
%matplotlib inline
plt.rcParams['figure.figsize'] = [10,10]
plt.rcParams['font.size'] = 16.0

seed = 48

np.random.seed(seed)
tf.random.set_seed(seed)

import os
import sys

# will need to clone https://github.com/kpidgeon/cms-l1-triggers for a few
# helper functions if running notebook and include path to repo here
sys.path.append('/usersc/bz18310/previous_notebook/cms-l1-triggers')

from utils.analysis import eff_rate, optimal_eff_rate
from utils.preprocessing import resize
from utils.plotting import *
from utils.hls4ml_helpers import *

plt.rc('figure', figsize=(8,6))

In [64]:
b = 5
config = {

        'QConv2D': {
          "kernel_quantizer": f'quantized_bits({b})',
          "bias_quantizer": f'quantized_bits({b})'
        },
        'QDense': {
          "kernel_quantizer": f'quantized_bits({b})',
          "bias_quantizer": f'quantized_bits({b})'
        },
        'QActivation': {'relu': f'quantized_relu({b})'}

    }

model_for_quantization = tf.keras.models.load_model('pruning_models/pruned_train_1_test.h5')
print_model_weights_sparsity(model_for_quantization)
qmodel = utils.model_quantize(model_for_quantization, config, b)
qmodel.summary()
print_model_weights_sparsity(qmodel)
compiler(qmodel)

pruning_params = {
      'pruning_schedule': tfmot.sparsity.keras.ConstantSparsity(0.5, begin_step=0, frequency=100)
  }

callbacks = [
  tfmot.sparsity.keras.UpdatePruningStep()
]

pruned_qmodel = tfmot.sparsity.keras.prune_low_magnitude(qmodel, **pruning_params)

pruned_qmodel.summary()





conv1/kernel:0: 50.00% sparsity  (18/36)
batch_normalization/gamma:0: 0.00% sparsity  (0/4)
batch_normalization/beta:0: 0.00% sparsity  (0/4)
batch_normalization/moving_mean:0: 0.00% sparsity  (0/4)
batch_normalization/moving_variance:0: 0.00% sparsity  (0/4)
conv2/kernel:0: 50.00% sparsity  (144/288)
batch_normalization_1/gamma:0: 0.00% sparsity  (0/8)
batch_normalization_1/beta:0: 0.00% sparsity  (0/8)
batch_normalization_1/moving_mean:0: 0.00% sparsity  (0/8)
batch_normalization_1/moving_variance:0: 0.00% sparsity  (0/8)
dense1/kernel:0: 50.00% sparsity  (288/576)
batch_normalization_2/gamma:0: 0.00% sparsity  (0/24)
batch_normalization_2/beta:0: 0.00% sparsity  (0/24)
batch_normalization_2/moving_mean:0: 0.00% sparsity  (0/24)
batch_normalization_2/moving_variance:0: 0.00% sparsity  (0/24)
output/kernel:0: 50.00% sparsity  (12/24)
output/bias:0: 0.00% sparsity  (0/1)
batch_normalization_3/gamma:0: 0.00% sparsity  (0/1)
batch_normalization_3/beta:0: 0.00% sparsity  (0/1)
batch_norma

In [85]:
from keras.layers.core import Flatten
from keras.layers.convolutional import *
from keras.layers.pooling import *



m = tf.keras.Sequential([
    tfmot.sparsity.keras.prune_low_magnitude(
    QConv2D(4, kernel_size = (3,3), activation='relu', kernel_quantizer=quantized_bits(5), bias_quantizer=quantized_bits(5)),
        input_shape=(20,12,1) ,**pruning_params),
    MaxPooling2D(pool_size=(2,2), padding='valid'),
    BatchNormalization(axis=1),
    tfmot.sparsity.keras.prune_low_magnitude(
    QConv2D(8, kernel_size = (3,3), activation='relu', kernel_quantizer=quantized_bits(5), bias_quantizer=quantized_bits(5)),
     **pruning_params),



])

m.summary()

Model: "sequential_30"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
prune_low_magnitude_q_conv2d (None, 18, 10, 4)         78        
_________________________________________________________________
module_wrapper_34 (ModuleWra (None, 9, 5, 4)           0         
_________________________________________________________________
batch_normalization_1 (Batch (None, 9, 5, 4)           36        
_________________________________________________________________
prune_low_magnitude_q_conv2d (None, 7, 3, 8)           586       
Total params: 700
Trainable params: 354
Non-trainable params: 346
_________________________________________________________________
