In [None]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow.keras import Model, layers
import tensorflow_io as tfio
import tensorflow_model_optimization as tfmot
from tensorflow_model_optimization.python.core.quantization.keras import quantize_layer
from tensorflow_model_optimization.python.core.quantization.keras import quantizers
from tensorflow_model_optimization.python.core.quantization.keras.graph_transformations import transforms
LayerNode = transforms.LayerNode

import features as features_lib
import params
params = params.Params()

In [None]:
# layers
def _batch_norm(name, params):
    def _bn_layer(layer_input):
        return layers.BatchNormalization(
            name=name,
            center=params.batchnorm_center,
            scale=params.batchnorm_scale,
            epsilon=params.batchnorm_epsilon,
        )(layer_input)

    return _bn_layer


def _conv(name, kernel, stride, filters, params):
    def _conv_layer(layer_input):
        output = layers.Conv2D(
            name="{}/conv".format(name),
            filters=filters,
            kernel_size=kernel,
            strides=stride,
            padding=params.conv_padding,
            use_bias=False,
            activation=None,
        )(layer_input)
        output = _batch_norm("{}/conv/bn".format(name), params)(output)
        # output = layers.ReLU(name="{}/relu".format(name))(output)
        output = tf.nn.relu6(output, "{}/relu6".format(name))
        return output

    return _conv_layer


def _separable_conv(name, kernel, stride, filters, params):
    def _separable_conv_layer(layer_input):
        output = layers.DepthwiseConv2D(
            name="{}/depthwise_conv".format(name),
            kernel_size=kernel,
            strides=stride,
            depth_multiplier=1,
            padding=params.conv_padding,
            use_bias=False,
            activation=None,
        )(layer_input)
        output = _batch_norm("{}/depthwise_conv/bn".format(name), params)(output)
        output = tf.nn.relu6(output, "{}/depthwise_conv/relu6".format(name))
        output = layers.Conv2D(
            name="{}/pointwise_conv".format(name),
            filters=filters,
            kernel_size=(1, 1),
            strides=1,
            padding=params.conv_padding,
            use_bias=False,
            activation=None,
        )(output)
        output = _batch_norm("{}/pointwise_conv/bn".format(name), params)(output)
        output = tf.nn.relu6(output, "{}/pointwise_conv/relu6".format(name))
        return output

    return _separable_conv_layer

In [None]:
_YAMNET_LAYER_DEFS = [
    # (layer_function, kernel, stride, num_filters)
    (_conv, [3, 3], 2, 32),
    (_separable_conv, [3, 3], 1, 64),
    (_separable_conv, [3, 3], 2, 128),
    (_separable_conv, [3, 3], 1, 128),
    (_separable_conv, [3, 3], 2, 256),
    (_separable_conv, [3, 3], 1, 256),
    (_separable_conv, [3, 3], 2, 512),
    (_separable_conv, [3, 3], 1, 512),
    (_separable_conv, [3, 3], 1, 512),
    (_separable_conv, [3, 3], 1, 512),
    (_separable_conv, [3, 3], 1, 512),
    (_separable_conv, [3, 3], 1, 512),
    (_separable_conv, [3, 3], 2, 1024),
    (_separable_conv, [3, 3], 1, 1024),
]

In [None]:
# model definition
waveform = layers.Input(
    batch_shape=(params.min_num_samples,),
    dtype=tf.float32,
    name="waveform_binary 0"
)

# magnitude spectrogram
window_length_samples = int(round(params.sample_rate * params.stft_window_seconds))
hop_length_samples = int(round(params.sample_rate * params.stft_hop_seconds))
fft_length = 2 ** int(np.ceil(np.log(window_length_samples) / np.log(2.0)))
framed_signal = tf.signal.frame(waveform, window_length_samples, hop_length_samples)
hann_window = tf.reshape(
    tf.constant(
        (0.5 - 0.5 * np.cos(2 * np.pi * np.arange(0, 1.0, 1.0 / window_length_samples))).astype(np.float32),
        name='hann_window'
    ),
    [1, window_length_samples]
)
windowed_signal = framed_signal * hann_window

# rfft
signal_frame_length = tf.shape(windowed_signal)[-1]
half_pad = (fft_length - signal_frame_length) // 2
padded_windowed_signal = tf.pad(
    windowed_signal,
    [
        # Don't add any padding in the frame dimension.
        [0, 0],
        # Pad before and after the signal within each frame.
        [half_pad, fft_length - signal_frame_length - half_pad]
    ],
    mode='CONSTANT',
    constant_values=0.0
)
reshaped_padded_windowed_signal = tf.reshape(
    padded_windowed_signal,
    [
        padded_windowed_signal.shape[0],
        1,
        padded_windowed_signal.shape[1],
    ]
)
# rdft = tf.signal.rfft2d(reshaped_padded_windowed_signal, [1,fft_length])
rdft = tf.keras.layers.Lambda(lambda x: tf.signal.rfft2d(x, [1,fft_length]))(reshaped_padded_windowed_signal)

# possibly a reshape in here
reshaped_rdft = tf.reshape(
    rdft,
    [
        rdft.shape[0],
        rdft.shape[2],
    ]
)
complex_abs = tf.math.abs(reshaped_rdft)

# magnitude_spectrogram = _tflite_stft_magnitude(
#     signal=waveform,
#     frame_length=window_length_samples,
#     frame_step=hop_length_samples,
#     fft_length=fft_length
# )


# linear mel weight matrix
num_spectrogram_bins = fft_length // 2 + 1
linear_to_mel_weight_matrix = tf.signal.linear_to_mel_weight_matrix(
    num_mel_bins=params.mel_bands,
    num_spectrogram_bins=num_spectrogram_bins,
    sample_rate=params.sample_rate,
    lower_edge_hertz=params.mel_min_hz,
    upper_edge_hertz=params.mel_max_hz
)

# mel spectrogram
mel_spectrogram = tf.matmul(
    complex_abs,
    linear_to_mel_weight_matrix
)


# log mel spectrogram
log_mel_spectrogram = tf.math.log(mel_spectrogram + params.log_offset)

# features
# spectrogram_hop_length_samples = int(round(params.sample_rate * params.stft_hop_seconds))
# spectrogram_sample_rate = params.sample_rate / spectrogram_hop_length_samples
# patch_window_length_samples = int(round(spectrogram_sample_rate * params.patch_window_seconds))
# patch_hop_length_samples = int(round(spectrogram_sample_rate * params.patch_hop_seconds))
# # features = tf.signal.frame(
# #     signal=log_mel_spectrogram,
# #     frame_length=patch_window_length_samples,
# #     frame_step=patch_hop_length_samples,
# #     axis=0
# # )
reshaped_lms = tf.reshape(
    log_mel_spectrogram,
    [
        1,
        log_mel_spectrogram.shape[0],
        log_mel_spectrogram.shape[1],
    ]
)
# quant_rlms = tf.quantization.quantize(
#     reshaped_lms,
#     -5400,
#     5400,
#     tf.quint16,
#     name='quant_rlms'
# )[0]
# def replacement(match_layer):
#     quant_layer = quantize_layer.QuantizeLayer(
#         quantizers.AllValuesQuantizer(
#             num_bits=8, per_axis=False, symmetric=False, narrow_range=False))
#     layer_config = tf.keras.layers.serialize(quant_layer)
#     layer_config['name'] = quant_layer.name

#     quant_layer_node = LayerNode(
#         layer_config,
#         input_layers=[match_layer])

#     return quant_layer_node

# quant_rlms = replacement(reshaped_lms)
quant_rlms = reshaped_lms
reshaped_qrlms = tf.reshape(
    quant_rlms,
    [
        quant_rlms.shape[0],
        quant_rlms.shape[1],
        quant_rlms.shape[2],
        1
    ]
)
net = tf.split(
    reshaped_qrlms,
    1,
    axis=0
)[0]
# prep conv mobilenet
# net = layers.Reshape(
#     (params.patch_frames, params.patch_bands, 1),
#     input_shape=(params.patch_frames, params.patch_bands),
# )(split_rqlms)

# mobilenet
for (i, (layer_fun, kernel, stride, filters)) in enumerate(_YAMNET_LAYER_DEFS):
    net = layer_fun(
        "layer{}".format(i + 1),
        kernel,
        stride,
        filters,
        params
    )(net)

embeddings = layers.GlobalAveragePooling2D()(net)
logits = layers.Dense(units=params.num_classes, use_bias=True)(embeddings)
predictions = layers.Activation(activation=params.classifier_activation)(logits)

In [None]:
yamnet = Model(
    name="yamnet_test",
    inputs=waveform,
    # outputs=[quant_rlms]
    outputs = [predictions]
)

In [None]:
yamnet.summary()

In [None]:
# save model and run inference on test data
yamnet.save(
    "models/3/tf"
)


In [None]:
for i, layer in enumerate(yamnet.layers):
    print(i, layer.name)

Quantization

In [None]:
quant_aware_model = tfmot.quantization.keras.quantize_model(yamnet)

In [12]:
names = set()
for i, layer in enumerate(yamnet.layers):
    # if "layer1/conv/bn/FusedBatchNormV3" in layer.name:
    if i >=18: # and i <= 103:
        print(layer)
        names.add(layer.name)
    # print(f'i: {i}\n name: {layer.name}\n')

<keras.layers.core.TFOpLambda object at 0x00000226A0B5E630>
<keras.layers.core.TFOpLambda object at 0x00000226A0B5E978>
<keras.layers.convolutional.Conv2D object at 0x00000226A7B70B38>
<keras.layers.normalization.batch_normalization.BatchNormalization object at 0x00000226A0B41160>
<keras.layers.core.TFOpLambda object at 0x00000226A7B74B70>
<keras.layers.convolutional.DepthwiseConv2D object at 0x00000226A7B74470>
<keras.layers.normalization.batch_normalization.BatchNormalization object at 0x00000226A7BCA1D0>
<keras.layers.core.TFOpLambda object at 0x00000226A7BCFBE0>
<keras.layers.convolutional.Conv2D object at 0x00000226A0851400>
<keras.layers.normalization.batch_normalization.BatchNormalization object at 0x00000226A7BD7668>
<keras.layers.core.TFOpLambda object at 0x00000226A7BD74E0>
<keras.layers.convolutional.DepthwiseConv2D object at 0x00000226A7BCF668>
<keras.layers.normalization.batch_normalization.BatchNormalization object at 0x00000226A7BE6C18>
<keras.layers.core.TFOpLambda obje

In [None]:
d8t = tfmot.quantization.keras.default_8bit.default_8bit_transforms
transforms = [
    d8t.InputLayerQuantize,
    d8t.InputLayerQuantize,
    d8t.Conv2DBatchNormQuantize,
    d8t.InputLayerQuantize,
    d8t.SeparableConvQuantize,
    d8t.SeparableConvQuantize,
    d8t.SeparableConvQuantize,
    d8t.SeparableConvQuantize,
    d8t.SeparableConvQuantize,
    d8t.SeparableConvQuantize,
    d8t.SeparableConvQuantize,
    d8t.SeparableConvQuantize,
    d8t.SeparableConvQuantize,
    d8t.SeparableConvQuantize,
    d8t.SeparableConvQuantize,
    d8t.SeparableConvQuantize,
    d8t.SeparableConvQuantize,
    d8t.InputLayerQuantize,
    d8t.InputLayerQuantize,
    d8t.InputLayerQuantize
]

In [13]:
# tfmot model transformer
mt = tfmot.quantization.keras.graph_transformations.model_transformer.ModelTransformer(
    yamnet,
    transforms,
    list(names)
)

In [14]:
mt.transform()

TypeError: 'module' object is not iterable

In [None]:
def apply_quant_to_layers(layer):
    if layer.name in names:
        return tfmot.quantization.keras.quantize_annotate_layer(layer)
    return layer

In [None]:
annotated_model = tf.keras.models.clone_model(
    yamnet,
    clone_function=apply_quant_to_layers
)

In [None]:
quant_aware_model = tfmot.quantization.keras.quantize_apply(annotated_model)
quant_aware_model.summary()

In [None]:
esc50_csv = './datasets/ESC-50-master/meta/esc50.csv'
base_data_path = './datasets/ESC-50-master/audio/'

pd_data = pd.read_csv(esc50_csv)
pd_data.head()

my_classes = ['dog', 'cat']
map_class_to_id = {'dog':0, 'cat':1}

filtered_pd = pd_data[pd_data.category.isin(my_classes)]

class_id = filtered_pd['category'].apply(lambda name: map_class_to_id[name])
filtered_pd = filtered_pd.assign(target=class_id)

full_path = filtered_pd['filename'].apply(lambda row: os.path.join(base_data_path, row))
filtered_pd = filtered_pd.assign(filename=full_path)

filtered_pd.head(10)

filenames = filtered_pd['filename']
targets = filtered_pd['target']
folds = filtered_pd['fold']

main_ds = tf.data.Dataset.from_tensor_slices((filenames, targets, folds))



In [None]:
# Utility functions for loading audio files and making sure the sample rate is correct.
@tf.function
def load_wav_16k_mono(filename):
  """ Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio. """
  file_contents = tf.io.read_file(filename)
  wav, sample_rate = tf.audio.decode_wav(
        file_contents,
        desired_channels=1)
  wav = tf.squeeze(wav, axis=-1)
  sample_rate = tf.cast(sample_rate, dtype=tf.int64)
  wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
  return wav

@tf.function
def frame_16k_mono(filename):
  wav = load_wav_16k_mono(filename)
  frames = tf.signal.frame(wav, 15600, 15600)
  return frames

    
def load_frames_for_map(filename, label, fold):
  frames = frame_16k_mono(filename)
  return (
    frames,
    label,
    fold
)

def unbatch_frames(frames, label, fold):
    # num_frames = reduce((lambda x, y: x* y), frames.shape[0:-1])
    num_frames = 5
    frames = tf.reshape(frames,[num_frames, 15600])
    return (
        frames, 
        tf.repeat(label, num_frames),
        tf.repeat(fold, num_frames)
    )

main_ds = main_ds.map(load_frames_for_map)
main_ds = main_ds.map(unbatch_frames).unbatch()

# split the data
cached_ds = main_ds.cache()
train_ds = cached_ds.filter(lambda frame, label, fold: fold < 4)
val_ds = cached_ds.filter(lambda frame, label, fold: fold == 4)
test_ds = cached_ds.filter(lambda frame, label, fold: fold == 5)

# remove the folds column now that it's not needed anymore
remove_fold_column = lambda frame, label, fold: (frame, label)

train_ds = train_ds.map(remove_fold_column)
val_ds = val_ds.map(remove_fold_column)
test_ds = test_ds.map(remove_fold_column)

# quantization of weights and activations
def representative_dataset():
    for frame, label in train_ds.take(100):
        yield [frame]

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(yamnet)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
tflite_quant_model = converter.convert()

In [None]:
# Save the model.
with open('./models/3/ye1.tflite', 'wb') as f:
  f.write(tflite_quant_model)