In [1]:
import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

In [2]:
import os
import numpy as np
import random
import librosa
import h5py
import tensorflow as tf
import keras
from keras.models import Model
from keras.layers import Input, Conv2D, BatchNormalization, MaxPooling2D, Flatten, Activation, Lambda
import keras.regularizers as regularizers
from keras.optimizers import Adam
from l3embedding.audio import pcm2float
from resampy import resample
import pescador
from skimage import img_as_float
from tqdm import tqdm

Using TensorFlow backend.


In [3]:
def shuffle_files(iterable):
    lst = list(iterable)
    random.shuffle(lst)
    return iter(lst)

def amplitude_to_db(S, amin=1e-10, dynamic_range=80.0):
    magnitude = np.abs(S)
    power = np.square(magnitude, out=magnitude)
    ref_value = power.max()

    log_spec = 10.0 * np.log10(np.maximum(amin, magnitude))
    log_spec -= log_spec.max()

    log_spec = np.maximum(log_spec, -dynamic_range)
    return log_spec

def get_melspectrogram(frame, n_fft=2048, mel_hop_length=242, samp_rate=48000, n_mels=256,\
                       quant_melspec=False, fmax=None):
    S = np.abs(librosa.core.stft(frame, n_fft=n_fft, hop_length=mel_hop_length, window='hann', center=True, pad_mode='constant'))
    S = librosa.feature.melspectrogram(sr=samp_rate, S=S, n_fft=n_fft, n_mels=n_mels, fmax=fmax, power=1.0, htk=True)
    S = amplitude_to_db(np.array(S))
    #if quant_melspec:
        #https://www.tensorflow.org/versions/r1.15/api_docs/python/tf/quantization/quantize
        #S = S.astype(np.int8)
    return S

In [4]:
def quant_data_generator(data_dir, batch_size=512, samp_rate=8000, n_fft=2048, \
                         n_mels=64, mel_hop_length=160, hop_size=0.1, fmax=None,\
                         random_state=None, start_batch_idx=None, quant_input=False):

    #global shortlist_files
    if random_state:
        random.seed(23455)
        
    frame_length = samp_rate * 1

    batch = None
    curr_batch_size = 0
    batch_idx = 0
        
    for fname in shuffle_files(os.listdir(data_dir)):
        print(fname)
        data_batch_path = os.path.join(data_dir, fname)
        #shortlist_files.append(data_batch_path)
        blob_start_idx = 0

        data_blob = np.load(data_batch_path)
        blob_size = len(data_blob['audio'])

        while blob_start_idx < blob_size:
            blob_end_idx = min(blob_start_idx + batch_size - curr_batch_size, blob_size)

            # If we are starting from a particular batch, skip computing all of
            # the prior batches
            if start_batch_idx is None or batch_idx >= start_batch_idx:
                if batch is None:
                    batch = data_blob['audio'][blob_start_idx:blob_end_idx]
                else:
                    batch = np.concatenate([batch, data_blob['audio'][blob_start_idx:blob_end_idx]])

            curr_batch_size += blob_end_idx - blob_start_idx
            blob_start_idx = blob_end_idx

            if blob_end_idx == blob_size:
                data_blob.close()

            if curr_batch_size == batch_size:
                X = []
                # If we are starting from a particular batch, skip yielding all
                # of the prior batches
                if start_batch_idx is None or batch_idx >= start_batch_idx:
                    #saved audio files are already in float so need not convert to float32
                    X = [get_melspectrogram(batch[i].flatten(), n_fft=n_fft, \
                                            mel_hop_length=mel_hop_length,\
                                            samp_rate=samp_rate, n_mels=n_mels,\
                                            quant_melspec=quant_input, fmax=fmax) for i in range(batch_size)]

                    batch = np.array(X)[:, :, :, np.newaxis]
                    #print(np.shape(batch)) #(64, 256, 191, 1)
                    return batch

                batch_idx += 1
                curr_batch_size = 0
                batch = None

def single_epoch_test_data_generator(file_list, quant_input=True, batch_size=64, samp_rate=8000, fmax=None,\
                                     n_fft=1024, n_mels=64, mel_hop_length=160, start_batch_idx=None):
    batch = None
    curr_batch_size = 0
    batch_idx = 0

    for fname in file_list:
        data_batch_path = fname
        blob_start_idx = 0

        data_blob = np.load(data_batch_path)
        blob_size = len(data_blob['audio'])

        while blob_start_idx < blob_size:
            blob_end_idx = min(blob_start_idx + batch_size - curr_batch_size, blob_size)

            # If we are starting from a particular batch, skip computing all of
            # the prior batches
            if start_batch_idx is None or batch_idx >= start_batch_idx:
                if batch is None:
                    batch = data_blob['audio'][blob_start_idx:blob_end_idx]
                else:
                    batch = np.concatenate([batch, data_blob['audio'][blob_start_idx:blob_end_idx]])

            curr_batch_size += blob_end_idx - blob_start_idx
            blob_start_idx = blob_end_idx

            if blob_end_idx == blob_size:
                data_blob.close()

            if curr_batch_size == batch_size:
                X = []
                if start_batch_idx is None or batch_idx >= start_batch_idx:
                    X = [get_melspectrogram(batch[i].flatten(), n_fft=n_fft, \
                                            mel_hop_length=mel_hop_length,\
                                            samp_rate=samp_rate, n_mels=n_mels,\
                                            quant_melspec=quant_input, fmax=fmax) for i in range(batch_size)]

                    batch = np.array(X)[:, :, :, np.newaxis]
                    #print(np.shape(batch)) #(64, 256, 191, 1)
                    yield batch

                batch_idx += 1
                curr_batch_size = 0
                batch = None

In [5]:
def quantize_keras_to_tflite(tflite_model_file, keras_model_path, quant_mode='default', quantized_input=False,\
                             n_mels=256, n_hop=242, n_dft=2048, asr=48000, halved_convs=False,\
                             calibrate_data_dir=None, num_calibration_steps=1024):

    def representative_dataset_gen():
            #l3_model = os.path.dirname(tflite_model_file)
            #splits = l3_model.split('_')

            print('Calibrating.........')
            for _ in range(num_calibration_steps):
                x = quant_data_generator(calibrate_data_dir, batch_size=1,\
                                         samp_rate=asr, n_fft=n_dft, n_mels=n_mels,\
                                         mel_hop_length=n_hop)
                yield [np.array(x).astype(np.float32)]
                
    converter = tf.lite.TFLiteConverter.from_keras_model_file(keras_model_path)
    
    if quant_mode == 'default':
        if calibrate_data_dir is None:
            raise ValueError('Quantized activation calibration needs data directory!')
        
        converter.optimizations = [tf.lite.Optimize.DEFAULT]
        if quantized_input:
            #converter.inference_input_type = tf.int8
            converter.inference_output_type = tf.int8
        #converter.default_ranges_stats = (0, 1)
        converter.representative_dataset = representative_dataset_gen
                
    elif quant_mode == 'size':
        converter.post_training_quantize = True
        converter.optimizations = [tf.lite.Optimize.OPTIMIZE_FOR_SIZE]
    else:
        raise ValueError('Unrecognized Quantization mode!')

    tflite_model = converter.convert()
    with open(tflite_model_file, "wb") as f:
        f.write(tflite_model)
    print('Tflite model saved in:', tflite_model_file)

In [6]:
def post_training_quantization(model_path, calibrate_data_dir, quant_mode='default', quantized_input=False,\
                               n_mels=256, n_hop=242, n_dft=2048, asr=48000, halved_convs=False,\
                               flatten=False, calibration_steps=1024):
    
    #1. Convert l3model to keras model for quantization (with maxpooling layer but flatten removed)
    dir_prefix = '/scratch/sk7898/quantization/' + os.path.basename(model_path).strip('.h5')
    
    if not os.path.isdir(dir_prefix):
        os.makedirs(dir_prefix)
    
    keras_model = keras.models.load_model(model_path)
    #print(keras_model.summary())
    
    #2.1 Convert keras to tflite model
    #2.2 Quantize model with mode 'default' for only weights quantization or 'size' for full quantization
    #2.3 Save the quantized tflite model
    
    print('Quantizing keras model and saving as tflite')
    input_type = '_int8Ip' if quantized_input else '_float32'
    tflite_model_file = os.path.join(dir_prefix, 'full_quantized_'+ quant_mode + input_type + '.tflite')
    
    quantize_keras_to_tflite(tflite_model_file, model_path, quant_mode=quant_mode,\
                             quantized_input=quantized_input, asr=asr,\
                             n_mels=n_mels, n_hop=n_hop, n_dft=n_dft, halved_convs=halved_convs, \
                             calibrate_data_dir=calibrate_data_dir, num_calibration_steps=calibration_steps)

**Quantize both the weights and the activations of the model**\
If the input is already in int8, set quantized_input = True\
If tflite should convert the float32 to int8 by adding a Quantize layer, quantized_input = False

In [7]:
#model_path = '/scratch/sk7898/l3pruning/embedding/fixed/reduced_input/l3_audio_original_48000_256_242_2048.h5'
#model_path = '/scratch/dr2915/l3pruning/embedding/fixed/reduced_input/l3_audio_20191108201753_8000_64_160_1024_half.h5'
model_path = '/scratch/dr2915/Nathan/pipeline.h5'
calibrate_data_dir = '/beegfs/dr2915/sonyc_ust/frames/8KHz'
calibration_steps = 32

quant_mode='default'
flatten=True
quantized_input=False
n_mels=64
n_hop=160
n_dft=1024
asr=8000
halved_convs=True if 'half' in model_path else False

post_training_quantization(model_path, calibrate_data_dir, quant_mode=quant_mode, quantized_input=quantized_input,\
                           n_mels=n_mels, n_hop=n_hop, n_dft=n_dft, asr=asr, halved_convs=halved_convs,\
                           flatten=flatten, calibration_steps=calibration_steps)


Quantizing keras model and saving as tflite
Instructions for updating:
`normal` is a deprecated alias for `truncated_normal`




Instructions for updating:
Use `tf.compat.v1.graph_util.convert_variables_to_constants`
Instructions for updating:
Use `tf.compat.v1.graph_util.extract_sub_graph`
INFO:tensorflow:Froze 54 variables.
INFO:tensorflow:Converted 54 variables to const ops.
Calibrating.........
00_002872.npz
11_000501.npz
27_000831.npz
13_001380.npz
40_000687.npz
40_001575.npz
32_001624.npz
40_000428.npz
02_002355.npz
38_002951.npz
27_001961.npz
38_000322.npz
23_001430.npz
40_001131.npz
31_000861.npz
05_010416.npz
41_002444.npz
40_000153.npz
27_003242.npz
04_000128.npz
33_002663.npz
32_002908.npz
04_001123.npz
37_001644.npz
29_001369.npz
02_000256.npz
06_001327.npz
40_000525.npz
10_002599.npz
40_001089.npz
40_001437.npz
23_002593.npz
Tflite model saved in: /scratch/sk7898/quantization/pipeline/full_quantized_default_float32.tflite


**Input/Output of tflite model (Interpreter)**

In [8]:
output_path = '/scratch/sk7898/quantization'
quant_model = 'pipeline/full_quantized_default_float32.tflite'
quant_output_path = os.path.join(output_path, quant_model)

interpreter = tf.lite.Interpreter(model_path=str(quant_output_path))
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

input_shape = input_details[0]['shape'][1:]
output_shape = output_details[0]['shape'][1:]
input_index = input_details[0]['index']
output_index = output_details[0]['index']

interpreter.allocate_tensors()

print("== Input details ==")
print(interpreter.get_input_details()[0])
print("type:", input_details[0]['dtype'])
print("\n== Output details ==")
print(interpreter.get_output_details()[0])

== Input details ==
{'name': 'input_1', 'index': 45, 'shape': array([ 1, 64, 51,  1], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0)}
type: <class 'numpy.float32'>

== Output details ==
{'name': 'urban_sound_classifier/output/Sigmoid', 'index': 44, 'shape': array([1, 8], dtype=int32), 'dtype': <class 'numpy.int8'>, 'quantization': (0.00390625, -128)}


**Quantize only the weights of the model**

In [None]:
quant_mode = 'size'
post_training_quantization(model_path, calibrate_data_dir, quant_mode=quant_mode, quantized_input=quantized_input,\
                           n_mels=n_mels, n_hop=n_hop, n_dft=n_dft, asr=asr, halved_convs=halved_convs,\
                           flatten=flatten, calibration_steps=calibration_steps)

**Generate Embedding from the tflite model**

In [11]:
def get_softmax_batch_from_tflite(data_gen, tflite_model_file, batch_size, classes=8):
    
    predictions = []
    interpreter = tf.lite.Interpreter(model_path=str(tflite_model_file))
    input_details = interpreter.get_input_details()
    output_details = interpreter.get_output_details()

    input_shape = input_details[0]['shape'][1:]
    output_shape = output_details[0]['shape'][1:]
    input_index = input_details[0]['index']
    output_index = output_details[0]['index']

    interpreter.resize_tensor_input(input_index, ((batch_size, ) + tuple(input_shape)))
    interpreter.resize_tensor_input(output_index, ((batch_size, ) + tuple(input_shape)))
    interpreter.allocate_tensors()
    
    print("== Input details ==")
    print(interpreter.get_input_details()[0])
    print("type:", input_details[0]['dtype'])
    print("\n== Output details ==")
    print(interpreter.get_output_details()[0])
                
    #predictions per batch   
    for idx, batch_x in enumerate(data_gen):
        x = np.array(batch_x).astype(np.float32)
        interpreter.set_tensor(input_index, x)
        interpreter.invoke()
        output = interpreter.get_tensor(output_index)
        predictions.append(output)
        
    return predictions

In [10]:
def gen_softmax(tflite_model_file, file_list, samp_rate=8000,\
                n_mels=64, emb_len=256, mel_hop_length=160,\
                n_fft=1024, batch_size=64):
    
    output = None
    classes = 8
    print('Getting softmax output for downstream classes out of Quantized tflite model')
    
    data_gen = single_epoch_test_data_generator(file_list, batch_size=batch_size, samp_rate=samp_rate,\
                                                n_fft=n_fft, n_mels=n_mels, mel_hop_length=mel_hop_length)

    output = get_softmax_batch_from_tflite(data_gen, tflite_model_file,\
                                           batch_size, classes=classes)
    return output

In [12]:
def get_test_files(data_dir, num_files=10):
    shortlist_files = []
    random.seed(23455)
    
    for fname in shuffle_files(os.listdir(data_dir)):
        data_batch_path = os.path.join(data_dir, fname)
        shortlist_files.append(data_batch_path)
        if len(shortlist_files) >= num_files:
            break
    return shortlist_files

In [16]:
out_file = 'selected_audio_files.npz'
tflite_model_file = '/scratch/sk7898/quantization/pipeline/full_quantized_default_float32.tflite'

if not os.path.exists(out_file):
    test_data_dir = '/beegfs/dr2915/sonyc_ust/frames/8KHz'
    shortlist_files = get_test_files(test_data_dir)
    np.savez(out_file, x=shortlist_files)

In [18]:
files = np.load(out_file)
shortlist_files = files['x']
print(shortlist_files)

['/beegfs/dr2915/sonyc_ust/frames/8KHz/01_001297.npz'
 '/beegfs/dr2915/sonyc_ust/frames/8KHz/27_002896.npz'
 '/beegfs/dr2915/sonyc_ust/frames/8KHz/04_002755.npz'
 '/beegfs/dr2915/sonyc_ust/frames/8KHz/03_000965.npz'
 '/beegfs/dr2915/sonyc_ust/frames/8KHz/16_010692.npz'
 '/beegfs/dr2915/sonyc_ust/frames/8KHz/08_001117.npz'
 '/beegfs/dr2915/sonyc_ust/frames/8KHz/25_010315.npz'
 '/beegfs/dr2915/sonyc_ust/frames/8KHz/06_000069.npz'
 '/beegfs/dr2915/sonyc_ust/frames/8KHz/18_001657.npz'
 '/beegfs/dr2915/sonyc_ust/frames/8KHz/06_002320.npz']


In [19]:
output = gen_softmax(tflite_model_file, shortlist_files, batch_size=64)

Getting softmax output for downstream classes out of Quantized tflite model
== Input details ==
{'name': 'input_1', 'index': 45, 'shape': array([64, 64, 51,  1], dtype=int32), 'dtype': <class 'numpy.float32'>, 'quantization': (0.0, 0)}
type: <class 'numpy.float32'>

== Output details ==
{'name': 'urban_sound_classifier/output/Sigmoid', 'index': 44, 'shape': array([64,  8], dtype=int32), 'dtype': <class 'numpy.int8'>, 'quantization': (0.00390625, -128)}


In [None]:
#output_file = 'selected_audio_predictions_fp.npz'
#np.savez(output_file, y=np.array(output))

In [21]:
pred = np.array(output).reshape(-1, 8)
pred_max = np.argmax(pred, axis=1) 

In [22]:
print(pred_max)

[6 6 6 6 6 7 7 7 7 7 7 7 7 7 7 7 6 7 6 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7 7
 7 7 7 7 7 7 7 7 7 7 7 7 7 7 6 7 7 7 7 7 7 7 7 7 7 6 6 6 7 6 6 6 6 7 7 7 6
 6 6 6 6 6 6 6 6 6 6 6 6 7 7 7 7 7 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 6 6 6
 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 6 6 6 6
 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6
 6 6 6 6 6 6 6 6 6 6 6 6 6 6 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 6 6 6 6 4 4
 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 7 6 6 6 6 6 6 6 6 6 6 6 6 6
 6 6 6 6 6 6 6 6 6 4 6 6 0 6 6 2 2 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 6 0
 6 6 6 6 6 6 6 6 6 6 6 6 6 0 6 4 4 0 0 6 0 0 6 6 6 0 6 6 0 0 0 0 0 0 0 0 0
 0 0 0 0 0 0 4 0 0 0 0 0 

In [23]:
pred_file = 'audio_class_pred_fp.npz'
np.savez(pred_file, y=pred_max)