In [None]:
import tensorflow as tf
import tensorflow_io as tfio
import tflite_model_maker as mm
from tflite_model_maker import audio_classifier
import os

import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import itertools
import glob
import random

from IPython.display import Audio, Image
from scipy.io import wavfile

print(f"TensorFlow Version: {tf.__version__}")
print(f"Model Maker Version: {mm.__version__}")

In [None]:

data_dir = '/home/poom/My/ML/Datasets/heart_sounds_ex'

my_class_names = ['normal', 'abnormal']

test_files = os.path.abspath(os.path.join(data_dir, '*/*.wav'))

def get_random_audio_file():
  test_list = glob.glob(test_files)
  random_audio_path = random.choice(test_list)
  print(random_audio_path)
  return random_audio_path


def show_sound_data(audio_path):
  sample_rate, audio_data = wavfile.read(audio_path, 'rb')

  code = audio_path.split('/')[-2]
  print(f'Heart sound code: {code}')

  plt.plot(audio_data)
  display(Audio(audio_data, rate=sample_rate))

print('functions and data structures created')

In [None]:
random_audio = get_random_audio_file()
show_sound_data(random_audio)

In [None]:
spec = audio_classifier.YamNetSpec(
    keep_yamnet_and_custom_heads=True,
    frame_step=3 * audio_classifier.YamNetSpec.EXPECTED_WAVEFORM_LENGTH,
    frame_length=6 * audio_classifier.YamNetSpec.EXPECTED_WAVEFORM_LENGTH)

In [None]:
total_data = audio_classifier.DataLoader.from_folder( spec, os.path.join(data_dir), cache=True, shuffle=True)
train_data, temp_data = total_data.split(0.8)
val_data, test_data = temp_data.split(0.5)
print("train_data: {0}, val_data: {1}, test_data: {2}".format(len(train_data), len(val_data), len(test_data)))
print(total_data.index_to_label)

In [None]:
batch_size = 128
epochs = 100

print('Training the model')
model = audio_classifier.create(
    train_data,
    spec,
    val_data,
    batch_size=batch_size,
    epochs=epochs)

In [None]:
print('Evaluating the model')
model.evaluate(test_data)

In [None]:
def show_confusion_matrix(confusion, test_labels):
  """Compute confusion matrix and normalize."""
  confusion_normalized = confusion.astype("float") / confusion.sum(axis=1)
  axis_labels = test_labels
  ax = sns.heatmap(
      confusion_normalized, xticklabels=axis_labels, yticklabels=axis_labels,
      cmap='Blues', annot=True, fmt='.2f', square=True)
  plt.title("Confusion matrix")
  plt.ylabel("True label")
  plt.xlabel("Predicted label")

confusion_matrix = model.confusion_matrix(test_data)
show_confusion_matrix(confusion_matrix.numpy(), test_data.index_to_label)

In [None]:
serving_model = model.create_serving_model()

print(f'Model\'s input shape and type: {serving_model.inputs}')
print(f'Model\'s output shape and type: {serving_model.outputs}')

In [None]:
# if you want to try another file just uncoment the line below
random_audio = get_random_audio_file()
show_sound_data(random_audio)

In [None]:
sample_rate, audio_data = wavfile.read(random_audio, 'rb')

audio_data = np.array(audio_data) / tf.int16.max
input_size = serving_model.input_shape[1]

splitted_audio_data = tf.signal.frame(audio_data, input_size, input_size, pad_end=True, pad_value=0)

print(f'Test audio path: {random_audio}')
print(f'Original size of the audio data: {len(audio_data)}')
print(f'Number of windows for inference: {len(splitted_audio_data)}')

In [None]:
print(random_audio)

results = []
print('Result of the window ith:  your model class -> score,  (spec class -> score)')
for i, data in enumerate(splitted_audio_data):
  yamnet_output, inference = serving_model(data)
  results.append(inference[0].numpy())
  result_index = tf.argmax(inference[0])
  spec_result_index = tf.argmax(yamnet_output[0])
  t = spec._yamnet_labels()[spec_result_index]
  result_str = f'Result of the window {i}: ' \
  f'\t{test_data.index_to_label[result_index]} -> {inference[0][result_index].numpy():.3f}, ' \
  f'\t({spec._yamnet_labels()[spec_result_index]} -> {yamnet_output[0][spec_result_index]:.3f})'
  print(result_str)


results_np = np.array(results)
mean_results = results_np.mean(axis=0)
result_index = mean_results.argmax()
print(f'Mean result: {test_data.index_to_label[result_index]} -> {mean_results[result_index]}')

In [None]:
models_path = './heart_sound_models'
print(f'Exporing the TFLite model to {models_path}')

model.export(models_path, tflite_filename='my_heart_sound_model.tflite')

In [None]:
model.export(models_path, export_format=[mm.ExportFormat.SAVED_MODEL, mm.ExportFormat.LABEL])