In [1]:
import csv

import numpy as np
import tensorflow as tf
from tensorflow.keras import Model, layers
import tensorflow_model_optimization as tfmot
import pandas as pd
import tensorflow_io as tfio

import features as features_lib
import params
params = params.Params()

In [2]:
# layers
def _batch_norm(name, params):
    def _bn_layer(layer_input):
        return layers.BatchNormalization(
            name=name,
            center=params.batchnorm_center,
            scale=params.batchnorm_scale,
            epsilon=params.batchnorm_epsilon,
        )(layer_input)

    return _bn_layer


def _conv(name, kernel, stride, filters, params):
    def _conv_layer(layer_input):
        output = layers.Conv2D(
            name="{}/conv".format(name),
            filters=filters,
            kernel_size=kernel,
            strides=stride,
            padding=params.conv_padding,
            use_bias=False,
            activation=None,
        )(layer_input)
        output = _batch_norm("{}/conv/bn".format(name), params)(output)
        # output = layers.ReLU(name="{}/relu".format(name))(output)
        output = tf.nn.relu6(output, "{}/relu6".format(name))
        return output

    return _conv_layer


def _separable_conv(name, kernel, stride, filters, params):
    def _separable_conv_layer(layer_input):
        output = layers.DepthwiseConv2D(
            name="{}/depthwise_conv".format(name),
            kernel_size=kernel,
            strides=stride,
            depth_multiplier=1,
            padding=params.conv_padding,
            use_bias=False,
            activation=None,
        )(layer_input)
        output = _batch_norm("{}/depthwise_conv/bn".format(name), params)(output)
        # output = layers.ReLU(name="{}/depthwise_conv/relu".format(name))(output)
        output = tf.nn.relu6(output, "{}/depthwise_conv/relu6".format(name))
        output = layers.Conv2D(
            name="{}/pointwise_conv".format(name),
            filters=filters,
            kernel_size=(1, 1),
            strides=1,
            padding=params.conv_padding,
            use_bias=False,
            activation=None,
        )(output)
        output = _batch_norm("{}/pointwise_conv/bn".format(name), params)(output)
        # output = layers.ReLU(name="{}/pointwise_conv/relu".format(name))(output)
        output = tf.nn.relu6(output, "{}/pointwise_conv/relu6".format(name))
        return output

    return _separable_conv_layer


In [3]:
_YAMNET_LAYER_DEFS = [
    # (layer_function, kernel, stride, num_filters)
    (_conv, [3, 3], 2, 32),
    (_separable_conv, [3, 3], 1, 64),
    (_separable_conv, [3, 3], 2, 128),
    (_separable_conv, [3, 3], 1, 128),
    (_separable_conv, [3, 3], 2, 256),
    (_separable_conv, [3, 3], 1, 256),
    (_separable_conv, [3, 3], 2, 512),
    (_separable_conv, [3, 3], 1, 512),
    (_separable_conv, [3, 3], 1, 512),
    (_separable_conv, [3, 3], 1, 512),
    (_separable_conv, [3, 3], 1, 512),
    (_separable_conv, [3, 3], 1, 512),
    (_separable_conv, [3, 3], 2, 1024),
    (_separable_conv, [3, 3], 1, 1024),
]

In [4]:
# model definition
waveform = layers.Input(
    batch_shape=(params.min_num_samples,),
    dtype=tf.float32,
    name="waveform_binary 0"
)
(
    log_mel_spectrogram,
    features,
) = features_lib.waveform_to_log_mel_spectrogram_patches(waveform, params)

# quant_features = tf.quantization.quantize(
#     features,
#     -2700.0,
#     2700.0,
#     tf.dtypes.quint8,
#     mode='MIN_COMBINED',
#     round_mode='HALF_AWAY_FROM_ZERO',
#     name=None,
#     narrow_range=False,
#     axis=None,
#     ensure_minimum_range=0.01
# )


# features_casted = tf.cast(features, tf.int8)
r1 = net = layers.Reshape(
    (params.patch_frames, params.patch_bands, 1),
    input_shape=(params.patch_frames, params.patch_bands),
)(features)
# )(quant_features)

for (i, (layer_fun, kernel, stride, filters)) in enumerate(_YAMNET_LAYER_DEFS):
    net = layer_fun("layer{}".format(i + 1), kernel, stride, filters, params)(net)
embeddings = layers.GlobalAveragePooling2D()(net)
logits = layers.Dense(units=params.num_classes, use_bias=True)(embeddings)
predictions = layers.Activation(activation=params.classifier_activation)(logits)


In [None]:
# tfmot quantizers test
# tfmot.quantization.keras.quantizers.AllValuesQuantizer(
#     8,
#     True,
#     True,
    
# )

In [5]:
yamnet = Model(
    name="yamnet_test",
    inputs=waveform,
    outputs=[predictions]
)
yamnet.load_weights('yamnet.h5')

In [6]:
tfmot.quantization.keras.quantize_model(yamnet)

AttributeError: 'tensorflow.python.framework.ops.EagerTensor' object has no attribute '_keras_history'

In [None]:
weights_list = yamnet.get_weights()
print(len(weights_list))
for i in range(len(weights_list)):
    print(weights_list[i].shape)

In [None]:
for i in range(len(yamnet.layers)):
    layer = yamnet.layers[i]
    print(f'i: {i}, layer: {layer}')

In [None]:
# weights_list[1].shape
for weight in weights_list:
    print(weight.shape)

Get Weights from TFLite Edge Model 

In [6]:
yamnet_edge = tf.lite.Interpreter(model_path="models/yamnet/tfhub/cpu.tflite")
# yamnet_edge = tf.lite.Interpreter(model_path="models/yamnet/tflite/yamnet.tflite")
yamnet_edge.allocate_tensors()

In [7]:
# yamnet_edge.allocate_tensors()
ye_deets = yamnet_edge.get_tensor_details()
tensors = []
for d in ye_deets:
    i = d['index']
    try:
        tensor = yamnet_edge.tensor(i)()
        tensors.append(tensor)
        # tensor = yamnet_edge.get_tensor(i)
    except:
        pass

In [13]:
for i in range(len(tensors)):
    tensor = tensors[i]
    print(tensor.shape)

(15600,)
(1,)
(1,)
(1,)
(15600,)
(2,)
(195, 80)
(96, 5)
(96, 5, 80)
(2,)
(400,)
(2, 2)
(3,)
(96, 1, 512)
(2,)
(96, 1, 257)
(2,)
(96, 257)
(96, 257)
(64, 257)
(64,)
(96, 64)
(96, 64)
(3,)
(1, 96, 64)
(1, 96, 64)
(4,)
(1, 96, 64, 1)
()
(1, 96, 64, 1)
(32, 3, 3, 1)
(32,)
(1, 48, 32, 32)
(1, 3, 3, 32)
(32,)
(1, 48, 32, 32)
(64, 1, 1, 32)
(64,)
(1, 48, 32, 64)
(1, 3, 3, 64)
(64,)
(1, 24, 16, 64)
(128, 1, 1, 64)
(128,)
(1, 24, 16, 128)
(1, 3, 3, 128)
(128,)
(1, 24, 16, 128)
(128, 1, 1, 128)
(128,)
(1, 24, 16, 128)
(1, 3, 3, 128)
(128,)
(1, 12, 8, 128)
(256, 1, 1, 128)
(256,)
(1, 12, 8, 256)
(1, 3, 3, 256)
(256,)
(1, 12, 8, 256)
(256, 1, 1, 256)
(256,)
(1, 12, 8, 256)
(1, 3, 3, 256)
(256,)
(1, 6, 4, 256)
(512, 1, 1, 256)
(512,)
(1, 6, 4, 512)
(1, 3, 3, 512)
(512,)
(1, 6, 4, 512)
(512, 1, 1, 512)
(512,)
(1, 6, 4, 512)
(1, 3, 3, 512)
(512,)
(1, 6, 4, 512)
(512, 1, 1, 512)
(512,)
(1, 6, 4, 512)
(1, 3, 3, 512)
(512,)
(1, 6, 4, 512)
(512, 1, 1, 512)
(512,)
(1, 6, 4, 512)
(1, 3, 3, 512)
(512,)
(1, 

In [9]:
for d in ye_deets:
    print(d['name'])

waveform_binary
stft/frame/zeros_like
stft/frame/concat
stft/frame/ones_like
stft/frame/StridedSlice
stft/frame/concat_1
stft/frame/Reshape
stft/frame/add_1
stft/frame/GatherV2;stft/frame/GatherV2/axis
stft/frame/concat_2/values_1
stft/frame/Reshape_3
stft/hann_window/sub_2
stft/mul
stft/rfft/Pad/paddings
stft/rfft/Pad
stft/rfft1
stft/rfft3
stft/rfft
stft/rfft4
stft/rfft2
stft/rfft5
magnitude_spectrogram
mel_spectrogram
add
mel_spectrogram;add
log_mel_spectrogram
Reshape/shape
feature_patch
tfl.quantize
ExpandDims
ExpandDims1
pre_tower/split/split_dim
pre_tower/split
tower0/network/layer1/conv/Conv2D
tower0/network/layer1/conv/BatchNorm/FusedBatchNormV3
tower0/network/layer1/conv/Relu6;tower0/network/layer1/conv/BatchNorm/FusedBatchNormV3;tower0/network/layer2/sepconv/BatchNorm/FusedBatchNormV3;tower0/network/layer2/sepconv/depthwise;tower0/network/layer1/conv/Conv2D
tower0/network/layer2/sepconv/BatchNorm/FusedBatchNormV3;tower0/network/layer2/sepconv/depthwise
tower0/network/layer2/s

Transfer learning

In [None]:
esc50_csv = './datasets/ESC-50-master/meta/esc50.csv'
base_data_path = './datasets/ESC-50-master/audio/'

pd_data = pd.read_csv(esc50_csv)
pd_data.head()

In [None]:
my_classes = ['dog', 'cat']
map_class_to_id = {'dog':0, 'cat':1}

filtered_pd = pd_data[pd_data.category.isin(my_classes)]

class_id = filtered_pd['category'].apply(lambda name: map_class_to_id[name])
filtered_pd = filtered_pd.assign(target=class_id)

full_path = filtered_pd['filename'].apply(lambda row: os.path.join(base_data_path, row))
filtered_pd = filtered_pd.assign(filename=full_path)

filtered_pd.head(10)

In [None]:
filenames = filtered_pd['filename']
targets = filtered_pd['target']
folds = filtered_pd['fold']

main_ds = tf.data.Dataset.from_tensor_slices((filenames, targets, folds))
main_ds.element_spec

In [None]:
# Utility functions for loading audio files and making sure the sample rate is correct.
@tf.function
def load_wav_16k_mono(filename):
  """ Load a WAV file, convert it to a float tensor, resample to 16 kHz single-channel audio. """
  file_contents = tf.io.read_file(filename)
  wav, sample_rate = tf.audio.decode_wav(
        file_contents,
        desired_channels=1)
  wav = tf.squeeze(wav, axis=-1)
  sample_rate = tf.cast(sample_rate, dtype=tf.int64)
  wav = tfio.audio.resample(wav, rate_in=sample_rate, rate_out=16000)
  return wav

@tf.function
def frame_16k_mono(filename):
  wav = load_wav_16k_mono(filename)
  frames = tf.signal.frame(wav, 15600, 15600)
  return frames

    
def load_frames_for_map(filename, label, fold):
  frames = frame_16k_mono(filename)
  return (
    frames,
    label,
    fold
  )

main_ds = main_ds.map(load_frames_for_map)
main_ds.element_spec


In [None]:
def unbatch_frames(frames, label, fold):
    # num_frames = reduce((lambda x, y: x* y), frames.shape[0:-1])
    num_frames = 5
    frames = tf.reshape(frames,[num_frames, 15600])
    return (
        frames, 
        tf.repeat(label, num_frames),
        tf.repeat(fold, num_frames)
    )
    
main_ds = main_ds.map(unbatch_frames).unbatch()
main_ds.element_spec

In [None]:
# split the data
cached_ds = main_ds.cache()
train_ds = cached_ds.filter(lambda frame, label, fold: fold < 4)
val_ds = cached_ds.filter(lambda frame, label, fold: fold == 4)
test_ds = cached_ds.filter(lambda frame, label, fold: fold == 5)

# remove the folds column now that it's not needed anymore
remove_fold_column = lambda frame, label, fold: (frame, label)

train_ds = train_ds.map(remove_fold_column)
val_ds = val_ds.map(remove_fold_column)
test_ds = test_ds.map(remove_fold_column)

In [None]:
list(train_ds.as_numpy_iterator())

In [None]:
# quantization of weights and activations
def representative_dataset():
    for frame, label in train_ds.take(100):
        yield [frame]

list(train_ds.take(1))

# next(representative_dataset())

In [None]:
converter = tf.lite.TFLiteConverter.from_keras_model(yamnet)
converter.optimizations = [tf.lite.Optimize.DEFAULT]
converter.representative_dataset = representative_dataset
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS_INT8]
tflite_quant_model = converter.convert()

In [None]:
# Save the model.
with open('./models/3/quant_test.tflite', 'wb') as f:
  f.write(tflite_quant_model)

In [None]:
# try to isolate just the cnn
dummy_input = layers.Input(
    batch_shape=(1, 96, 64),
    dtype=tf.float32,
    name="dummy input"
)
# r1(dummy_input)
cnn = tf.keras.Sequential(
    [
        dummy_input,
        r1
    ],
    # name="yamnet_cnn",
    # inputs=dummy_input,
    # outputs=[logits]
)

In [None]:
# convert to tflite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
# converter = tf.lite.TFLiteConverter.from_keras_model(yamnet)
tflite_model = converter.convert()
# save to file called me/yamnet.tflite
# with open('models/3/me/yamnet_quant.tflite', 'wb') as f:
with open('models/3/me/features.tflite', 'wb') as f:
    f.write(tflite_model)