In [19]:
import tensorflow as tf
import numpy as np
import librosa
import os

# --- Parameters (must match your training pipeline) ---
desired_time_seconds = 4.0
sample_rate = 22050  # This is the default from librosa.load()
num_mel_bins = 128
class_names = ['air_conditioner', 'car_horn', 'children_playing', 'dog_bark', 'drilling', 'engine_idling', 'gun_shot', 'jackhammer', 'siren', 'street_music']


In [20]:

# --- Step 1: Load the TFLite model ---
interpreter = tf.lite.Interpreter(model_path='C:\\Users\\ebd-marco\\Documents\\homeaudio_ml\\notebooks\\sound_classifier_quantized.tflite')
interpreter.allocate_tensors()


    TF 2.20. Please use the LiteRT interpreter from the ai_edge_litert package.
    See the [migration guide](https://ai.google.dev/edge/litert/migration)
    for details.
    


In [21]:

# Get input and output details
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()
input_shape = input_details[0]['shape']

# DEBUG PRINT: What does the interpreter expect?
print(f"DEBUG: Interpreter expects input shape: {input_details[0]['shape']}")
print(f"DEBUG: Interpreter expects input dtype: {input_details[0]['dtype']}")

DEBUG: Interpreter expects input shape: [  1 128 173   1]
DEBUG: Interpreter expects input dtype: <class 'numpy.float32'>


In [22]:

# --- Step 2: Load and preprocess a test audio file ---
# Replace 'your_audio_file.wav' with the path to a file you want to test
test_audio_path = 'C:\\Users\\ebd-marco\\Documents\\homeaudio_ml\\notebooks\\your_audio_file.mp3' 

if not os.path.exists(test_audio_path):
    print(f"Error: The audio file '{test_audio_path}' does not exist.")
    exit()


In [23]:

# Load the audio and resample to the correct rate
audio, _ = librosa.load(test_audio_path, sr=sample_rate)


In [24]:

# Pad or truncate the audio to the desired length
desired_samples = int(desired_time_seconds * sample_rate)
if len(audio) < desired_samples:
    samples_to_pad = desired_samples - len(audio)
    formatted_audio = np.pad(audio, (0, samples_to_pad), 'constant')
else:
    formatted_audio = audio[:desired_samples]


In [25]:

# Generate the Mel spectrogram
mel_spectrogram = librosa.feature.melspectrogram(y=formatted_audio, sr=sample_rate, n_mels=num_mel_bins)

# Convert to decibel scale
mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)

# Add batch and channel dimensions to match the model's input shape
input_data = mel_spectrogram_db[np.newaxis, ..., np.newaxis].astype(np.float32)

# DEBUG PRINT: What does the prepared data look like?
print(f"\nDEBUG: Prepared input data shape: {input_data.shape}")
print(f"DEBUG: Prepared input data dtype: {input_data.dtype}")
print(f"DEBUG: Prepared input data min/max values: {input_data.min()} / {input_data.max()}")


DEBUG: Prepared input data shape: (1, 128, 173, 1)
DEBUG: Prepared input data dtype: float32
DEBUG: Prepared input data min/max values: -80.0 / 3.814697265625e-06


In [26]:

# --- Step 3: Run Inference ---
interpreter.set_tensor(input_details[0]['index'], input_data)
interpreter.invoke()



In [27]:

# --- Step 4: Get and interpret the output ---
output_data = interpreter.get_tensor(output_details[0]['index'])
predicted_class_id = np.argmax(output_data)
predicted_class_name = class_names[predicted_class_id]

# DEBUG PRINT: What did the output tensor look like before interpretation?
print(f"\nDEBUG: Raw output tensor shape: {output_data.shape}")
print(f"DEBUG: Raw output tensor dtype: {output_data.dtype}")
print(f"DEBUG: Raw output tensor values: {output_data}")


DEBUG: Raw output tensor shape: (1, 10)
DEBUG: Raw output tensor dtype: float32
DEBUG: Raw output tensor values: [[0.1015625 0.1015625 0.1015625 0.1015625 0.1015625 0.1015625 0.1015625
  0.1015625 0.1015625 0.1015625]]


In [28]:
# Print the results
print(f"Predicted class ID: {predicted_class_id}")
print(f"Predicted class name: {predicted_class_name}")
print(f"Model output probabilities: {output_data}")

Predicted class ID: 0
Predicted class name: air_conditioner
Model output probabilities: [[0.1015625 0.1015625 0.1015625 0.1015625 0.1015625 0.1015625 0.1015625
  0.1015625 0.1015625 0.1015625]]
