### Full integer quantization of weights and activations into 8-bit integer
---

#### Load validation data and convert

In [102]:
import data as datapy

val_data_path = "/home/jovyan/cut-data/validation/"

# load validation as representative data for quantization
print('Loading validation data...', flush=True)
x_val, y_val, labels, file_paths_val = datapy.loadData(val_data_path)
print('...Done. Loaded {} validation samples and {} labels.'.format(x_val.shape[0], y_val.shape[1]), flush=True)

2023-08-07 15:54:30.845349: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-07 15:54:30.889253: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-08-07 15:54:30.890194: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Loading validation data...
Processed 0 files. Currently processing file: /home/jovyan/cut-data/validation/non_target/0.920_0001_270097.wav
Processed 100 files. Currently processing file: /home/jovyan/cut-data/validation/non_target/0.930_0002_182583971.wav
Processed 200 files. Currently processing file: /home/jovyan/cut-data/validation/non_target/0.940_0004_534761.wav
Processed 300 files. Currently processing file: /home/jovyan/cut-data/validation/non_target/0.950_0018_226391901.wav
Processed 400 files. Currently processing file: /home/jovyan/cut-data/validation/non_target/0.970_0017_647758.wav
Processed 500 files. Currently processing file: /home/jovyan/cut-data/validation/non_target/1.000_0001_0H2uMhzSitY_520.wav
Processed 600 files. Currently processing file: /home/jovyan/cut-data/validation/non_target/1.000_0002_--ivFZu-hlc_30.wav
Processed 700 files. Currently processing file: /home/jovyan/cut-data/validation/non_target/1.000_0002_2RpOd9MJjyQ_10.wav
Processed 800 files. Currently p

### Load dataset generator

#### Balance the validation data

In [104]:
# balance the validation data:
print('Balance the validation data...')

y_val_indices = np.argmax(y_val, axis=1)

# minimum of one class
min_samples = min(np.bincount(y_val_indices))

# reduce entries until minimum after shuffle 
balanced_x_val = []
balanced_y_val = []
balanced_file_paths_val = []
for label in np.unique(y_val_indices):
    indices = np.where(y_val_indices == label)[0]
    np.random.shuffle(indices)  # Random order for random removal of samples
    indices = indices[:min_samples]
    balanced_x_val.append(x_val[indices])
    balanced_y_val.append(y_val[indices])
    balanced_file_paths_val.extend(file_paths_val[indices])

# Combine the balanced data for all classes
balanced_x_val = np.concatenate(balanced_x_val, axis=0)
balanced_y_val = np.concatenate(balanced_y_val, axis=0)
balanced_file_paths_val = np.array(balanced_file_paths_val)

print('Balanced validation data:')
print('balanced_x_val shape:', balanced_x_val.shape)
print('balanced_y_val shape:', balanced_y_val.shape)
print('balanced_file_paths_val shape:', balanced_file_paths_val.shape)

print('...Done. Loaded {} validation samples and {} labels.'.format(balanced_x_val.shape[0], balanced_y_val.shape[0]), flush=True)

Balance the validation data...
Balanced validation data:
balanced_x_val shape: (944,)
balanced_y_val shape: (944, 2)
balanced_file_paths_val shape: (944,)
...Done. Loaded 944 validation samples and 944 labels.


In [129]:
# Convert to numpy arrays
import numpy as np

x_val = np.array(balanced_x_val, dtype='float32')
y_val = np.array(balanced_y_val, dtype='float32')

In [29]:
import model as modelpy

batch_size = 12



val_gen = modelpy.AudioDataGenerator(balanced_file_paths_val, balanced_y_val, batch_size=batch_size)

##### Load Keras Model

In [97]:
import tensorflow as tf
import keras

# baseline keras file which still has the activation layer in the end
keras_baselineModel_activation_path = "/home/jovyan/models/checkpoints_/baseline_two_class_model_activation/"
keras_baselineModel_activation = keras.models.load_model(keras_baselineModel_activation_path)

print("Finished loading keras model")

keras_baselineModel_activation.summary()


Finished loading keras model
Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 INPUT (InputLayer)          [(None, 144000)]             0         []                            
                                                                                                  
 ADVANCED_SPEC1 (LinearSpec  (None, 128, 513, 1)          1         ['INPUT[0][0]']               
 Layer)                                                                                           
                                                                                                  
 BNORM_SPEC_NOQUANT (BatchN  (None, 128, 513, 1)          4         ['ADVANCED_SPEC1[0][0]']      
 ormalization)                                                                                    
                                                                 

### Quantization

In [35]:
def representative_data_gen():
    print("Shape of val_gen:", len(val_gen))
    for batch in val_gen:
        print("########NEW BATCH#######")
        print("Shape of batch:", len(batch))
        for input_value in batch:
            print("########NEW ENTRY######")
            print("Shape of input_value:", len(input_value))
            yield [input_value]
            break
        break

converter = tf.lite.TFLiteConverter.from_keras_model(keras_baselineModel_activation)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_data_gen
# Ensure that if any ops can't be quantized, the converter throws an error
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]

print('Starting conversion using Quantization...', flush=True)
baseline_activation_INT8_tflite = converter.convert()
print('...Done.', flush=True)

Starting conversion using Quantization...
INFO:tensorflow:Assets written to: /tmp/tmpc0xacblc/assets


INFO:tensorflow:Assets written to: /tmp/tmpc0xacblc/assets
2023-08-07 17:16:01.679491: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.


Shape of val_gen: 79
########NEW BATCH#######
Shape of batch: 2
########NEW ENTRY######
Shape of input_value: 12


2023-08-07 17:16:01.679551: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2023-08-07 17:16:01.679891: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/tmpc0xacblc
2023-08-07 17:16:01.704859: I tensorflow/cc/saved_model/reader.cc:91] Reading meta graph with tags { serve }
2023-08-07 17:16:01.704904: I tensorflow/cc/saved_model/reader.cc:132] Reading SavedModel debug info (if present) from: /tmp/tmpc0xacblc
2023-08-07 17:16:01.786257: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.
2023-08-07 17:16:02.173411: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /tmp/tmpc0xacblc
2023-08-07 17:16:02.323912: I tensorflow/cc/saved_model/loader.cc:314] SavedModel load for tags { serve }; Status: success: OK. Took 644020 microseconds.
fully_quantize: 0, inference_type: 6, input_inference_type: FLOAT32, output_inference_type: FLOAT32


...Done.


#### Evaluate

In [39]:
interpreter = tf.lite.Interpreter(model_content=baseline_activation_INT8_tflite)
interpreter.allocate_tensors()

tflite_size = len(baseline_activation_INT8_tflite) / (1024 * 1024)
print(f"Size of the TFLite model: {tflite_size:.2f} MB")

Size of the TFLite model: 6.99 MB


##### Save

In [40]:
# Save the quantized model:
tflite_model_INT8_path = "/home/jovyan/models/checkpoints_/baseline_activation_INT8.tflite"
with open(tflite_model_INT8_path, "wb") as f:
    f.write(baseline_activation_INT8_tflite)
print("Saved!")

Saved!


In [98]:
tf.lite.experimental.Analyzer.analyze(model_path=tflite_model_INT8_path,
                                      model_content=None,
                                      gpu_compatibility=False)

In [41]:
%load_ext autoreload
%autoreload 2

import evaluateTFliteModel

test_data_path = "/home/jovyan/cut-data/testing/"
batch_size = 12

evaluateTFliteModel.evaluate_tflite_model(tflite_model_INT8_path, test_data_path, batch_size)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Processed 0 files. Currently processing file: /home/jovyan/cut-data/testing/non_target/0.920_0001_703520.wav
Processed 100 files. Currently processing file: /home/jovyan/cut-data/testing/non_target/0.920_0057_645986.wav
Processed 200 files. Currently processing file: /home/jovyan/cut-data/testing/non_target/0.930_0263_741788.wav
Processed 300 files. Currently processing file: /home/jovyan/cut-data/testing/non_target/0.950_0002_645965.wav
Processed 400 files. Currently processing file: /home/jovyan/cut-data/testing/non_target/0.960_0024_103739801.wav
Processed 500 files. Currently processing file: /home/jovyan/cut-data/testing/non_target/1.000_0001_-X5Ay0Wuew0_20.wav
Processed 600 files. Currently processing file: /home/jovyan/cut-data/testing/non_target/1.000_0001_4TQzd0lB8IQ_30.wav
Processed 700 files. Currently processing file: /home/jovyan/cut-data/testing/non_target/1.000_0002_1MF9_29YUZU_10.wav