<a href="https://colab.research.google.com/github/shaja-asm/cry-detection/blob/main/tf_lite_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import numpy as np
import librosa
import librosa.display
# import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from tensorflow.keras.utils import Sequence
import datetime
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, TensorBoard, EarlyStopping
from tensorflow.keras.optimizers import Adam
from scipy.ndimage import zoom
import ctypes

# gpus = tf.config.experimental.list_physical_devices('GPU')
# if gpus:
#     try:
#         for gpu in gpus:
#             tf.config.experimental.set_memory_growth(gpu, True)
#     except RuntimeError as e:
#         print(e)
# print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

2024-08-20 12:40:29.401064: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-08-20 12:40:29.683500: I external/local_xla/xla/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-08-20 12:40:29.960370: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:485] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-08-20 12:40:30.199391: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:8454] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-08-20 12:40:30.265215: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1452] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-08-20 12:40:30.737707: I tensorflow/core/platform/cpu_feature_gu

In [2]:
AUDIO_PATH = 'CryCorpusFinal'
CRY_FOLDER = os.path.join(AUDIO_PATH, 'cry')
NOTCRY_FOLDER = os.path.join(AUDIO_PATH, 'notcry')
IMG_SIZE = (64, 64)
BATCH_SIZE = 32
EPOCHS = 25

In [3]:
def load_audio_files(folder):
    files = []
    for filename in os.listdir(folder):
        if filename.endswith('.wav'):
            files.append(os.path.join(folder, filename))
    return files

def compute_spectrogram(y, sr, n_fft=2048, hop_length=512):
    D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
    D_dB = librosa.amplitude_to_db(np.abs(D), ref=np.max)
    return D_dB

def save_spectrogram_to_disk(D_dB, save_path):
    if not os.path.exists(os.path.dirname(save_path)):
        os.makedirs(os.path.dirname(save_path))
    np.save(save_path, D_dB)


In [4]:
cry_files = load_audio_files(CRY_FOLDER)
notcry_files = load_audio_files(NOTCRY_FOLDER)

data = []
labels = []

for idx, file in enumerate(cry_files):
    y, sr = librosa.load(file, sr=None)
    y = librosa.util.normalize(y)
    D_dB = compute_spectrogram(y, sr)
    save_path = os.path.join(f'{0}/spectrograms'.format(AUDIO_PATH), f'cry_{idx}.npy')
    save_spectrogram_to_disk(D_dB, save_path)
    data.append(save_path)
    labels.append(1)

for idx, file in enumerate(notcry_files):
    y, sr = librosa.load(file, sr=None)
    y = librosa.util.normalize(y)
    D_dB = compute_spectrogram(y, sr)
    save_path = os.path.join(f'{0}/spectrograms'.format(AUDIO_PATH), f'notcry_{idx}.npy')
    save_spectrogram_to_disk(D_dB, save_path)
    data.append(save_path)
    labels.append(0)

data = np.array(data)
labels = np.array(labels)



In [5]:
# Split the datasets
X_train, X_val, y_train, y_val = train_test_split(data, labels, test_size=0.2, random_state=42)

class OnTheFlyDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, file_paths, labels, batch_size, img_size, shuffle=True, augment=False):
        self.file_paths = file_paths
        self.labels = labels
        self.batch_size = batch_size
        self.img_size = img_size
        self.shuffle = shuffle
        self.augment = augment
        self.indices = np.arange(len(self.file_paths))
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.file_paths) / self.batch_size))

    def __getitem__(self, index):
        batch_indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        batch_file_paths = [self.file_paths[i] for i in batch_indices]
        batch_labels = [self.labels[i] for i in batch_indices]

        X, y = self.__data_generation(batch_file_paths, batch_labels)
        return X, y

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)

    def __data_generation(self, batch_file_paths, batch_labels):
        X = np.empty((len(batch_file_paths), *self.img_size, 1), dtype=np.float32)
        y = np.empty((len(batch_file_paths),), dtype=int)

        for i, file_path in enumerate(batch_file_paths):
            D_dB = np.load(file_path)
            D_dB = D_dB[..., np.newaxis]  # Add channel dimension

            # Resizing
            zoom_factors = [self.img_size[0] / D_dB.shape[0], self.img_size[1] / D_dB.shape[1], 1]
            D_dB = zoom(D_dB, zoom_factors, order=3)  # order=3 for cubic interpolation

            if self.augment:
                if np.random.rand() > 0.5:
                    D_dB = np.flip(D_dB, axis=1)  # Flip left-right
                if np.random.rand() > 0.5:
                    D_dB = np.flip(D_dB, axis=0)  # Flip up-down
                if np.random.rand() > 0.5:
                    D_dB = D_dB + np.random.uniform(-0.2, 0.2, size=D_dB.shape)  # Random brightness

            X[i,] = D_dB
            y[i] = batch_labels[i]

        return X, y

train_generator = OnTheFlyDataGenerator(X_train, y_train, BATCH_SIZE, IMG_SIZE, shuffle=True, augment=True)
val_generator = OnTheFlyDataGenerator(X_val, y_val, BATCH_SIZE, IMG_SIZE, shuffle=False, augment=False)

# l2 regularization
l2_regularizer = tf.keras.regularizers.l2(0.001)

model = Sequential([
        Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_SIZE[0], IMG_SIZE[1], 1), kernel_regularizer=l2_regularizer),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.25),
        Conv2D(64, (3, 3), activation='relu', kernel_regularizer=l2_regularizer),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.25),
        Conv2D(128, (3, 3), activation='relu', kernel_regularizer=l2_regularizer),
        BatchNormalization(),
        MaxPooling2D((2, 2)),
        Dropout(0.25),
        Flatten(),
        Dense(128, activation='relu', kernel_regularizer=l2_regularizer),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])

optimizer = Adam(learning_rate=1e-4)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1, profile_batch='500,520')
checkpoint_callback = ModelCheckpoint('cry_detection_model.keras', monitor='val_loss', save_best_only=True, mode='min')
lr_callback = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
class_weights = {0: 1., 1: 1.}

history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=val_generator,
    class_weight=class_weights,
    callbacks=[tensorboard_callback, checkpoint_callback, lr_callback, early_stopping_callback]
)


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
2024-08-20 12:41:23.213595: I external/local_tsl/tsl/profiler/lib/profiler_session.cc:103] Profiler session initializing.
2024-08-20 12:41:23.213714: I external/local_tsl/tsl/profiler/lib/profiler_session.cc:118] Profiler session started.
2024-08-20 12:41:23.216596: I external/local_tsl/tsl/profiler/lib/profiler_session.cc:130] Profiler session tear down.


Epoch 1/25


  self._warn_if_super_not_called()
2024-08-20 12:41:28.259704: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 15745024 exceeds 10% of free system memory.
2024-08-20 12:41:28.333404: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 15745024 exceeds 10% of free system memory.
2024-08-20 12:41:28.346625: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 15745024 exceeds 10% of free system memory.
2024-08-20 12:41:28.420935: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 14929920 exceeds 10% of free system memory.
2024-08-20 12:41:28.422494: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 14929920 exceeds 10% of free system memory.


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m36s[0m 498ms/step - accuracy: 0.6325 - loss: 1.3490 - val_accuracy: 0.5643 - val_loss: 1.8692 - learning_rate: 1.0000e-04
Epoch 2/25
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m31s[0m 454ms/step - accuracy: 0.7972 - loss: 0.8497 - val_accuracy: 0.5783 - val_loss: 0.9490 - learning_rate: 1.0000e-04
Epoch 3/25
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 424ms/step - accuracy: 0.8408 - loss: 0.7354 - val_accuracy: 0.8775 - val_loss: 0.6990 - learning_rate: 1.0000e-04
Epoch 4/25
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 410ms/step - accuracy: 0.8582 - loss: 0.7194 - val_accuracy: 0.8735 - val_loss: 0.6553 - learning_rate: 1.0000e-04
Epoch 5/25
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 421ms/step - accuracy: 0.9015 - loss: 0.6124 - val_accuracy: 0.8855 - val_loss: 0.6091 - learning_rate: 1.0000e-04
Epoch 6/25
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[

2024-08-20 12:45:17.734083: I external/local_tsl/tsl/profiler/lib/profiler_session.cc:103] Profiler session initializing.
2024-08-20 12:45:17.734148: I external/local_tsl/tsl/profiler/lib/profiler_session.cc:118] Profiler session started.


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m29s[0m 438ms/step - accuracy: 0.8994 - loss: 0.6004 - val_accuracy: 0.8795 - val_loss: 0.7219 - learning_rate: 1.0000e-04
Epoch 9/25
[1m15/63[0m [32m━━━━[0m[37m━━━━━━━━━━━━━━━━[0m [1m31s[0m 656ms/step - accuracy: 0.8959 - loss: 0.6353

2024-08-20 12:45:36.198490: I external/local_tsl/tsl/profiler/lib/profiler_session.cc:68] Profiler session collecting data.


[1m16/63[0m [32m━━━━━[0m[37m━━━━━━━━━━━━━━━[0m [1m33s[0m 711ms/step - accuracy: 0.8972 - loss: 0.6331

2024-08-20 12:45:36.853588: I external/local_tsl/tsl/profiler/lib/profiler_session.cc:130] Profiler session tear down.
2024-08-20 12:45:36.870334: I external/local_tsl/tsl/profiler/rpc/client/save_profile.cc:147] Collecting XSpace to repository: logs/fit/20240820-124123/plugins/profile/2024_08_20_12_45_36/TEC-LAP-47.xplane.pb


[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m41s[0m 616ms/step - accuracy: 0.9112 - loss: 0.6025 - val_accuracy: 0.8554 - val_loss: 0.8373 - learning_rate: 1.0000e-04
Epoch 10/25
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m38s[0m 573ms/step - accuracy: 0.9070 - loss: 0.5977 - val_accuracy: 0.8735 - val_loss: 0.7531 - learning_rate: 1.0000e-04
Epoch 11/25
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m35s[0m 519ms/step - accuracy: 0.9074 - loss: 0.6040 - val_accuracy: 0.8795 - val_loss: 0.7400 - learning_rate: 5.0000e-05
Epoch 12/25
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m30s[0m 447ms/step - accuracy: 0.9155 - loss: 0.5742 - val_accuracy: 0.8414 - val_loss: 0.9216 - learning_rate: 5.0000e-05
Epoch 13/25
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 420ms/step - accuracy: 0.9110 - loss: 0.5748 - val_accuracy: 0.8313 - val_loss: 1.0146 - learning_rate: 5.0000e-05
Epoch 14/25
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━

In [6]:
y_pred = model.predict(val_generator)
y_pred = (y_pred > 0.5).astype(int)
acc = accuracy_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)

print(f'Accuracy: {acc}')
print(f'F1 Score: {f1}')

model.save('cry_detection_model.keras')

[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 314ms/step
Accuracy: 0.8855421686746988
F1 Score: 0.8922495274102079


In [7]:
import pathlib
tflite_models_dir = pathlib.Path("tflite_models")
tflite_models_dir.mkdir(exist_ok=True, parents=True)


converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

tflite_model_file = tflite_models_dir/"cry_detection_model.tflite"
tflite_model_file.write_bytes(tflite_model)

converter.optimizations = [tf.lite.Optimize.DEFAULT]

tflite_fp16_model = converter.convert()
tflite_model_fp16_file = tflite_models_dir/"cry_detection_model_quant.tflite"
tflite_model_fp16_file.write_bytes(tflite_fp16_model)

# converter.target_spec.supported_types = [tf.float16]
# tflite_quant_model = converter.convert()
# tflite_model_quant_file = tflite_models_dir/"cry_detection_model_quant_f16.tflite"
# tflite_model_fp16_file.write_bytes(tflite_fp16_model)


INFO:tensorflow:Assets written to: /tmp/tmpwyju85de/assets


INFO:tensorflow:Assets written to: /tmp/tmpwyju85de/assets


Saved artifact at '/tmp/tmpwyju85de'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 64, 64, 1), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  139825537743184: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139825538056784: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139825538059600: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139825538059248: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139825537737728: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139825538060128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139825538064880: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139825538062944: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139825538063296: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139825538064704: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139825538064352:

W0000 00:00:1724138374.061483     956 tf_tfl_flatbuffer_helpers.cc:392] Ignored output_format.
W0000 00:00:1724138374.063647     956 tf_tfl_flatbuffer_helpers.cc:395] Ignored drop_control_dependency.
2024-08-20 12:49:34.072616: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmpwyju85de
2024-08-20 12:49:34.073923: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2024-08-20 12:49:34.073939: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /tmp/tmpwyju85de
2024-08-20 12:49:34.094941: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:388] MLIR V1 optimization pass is not enabled
2024-08-20 12:49:34.097562: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2024-08-20 12:49:34.248222: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /tmp/tmpwyju85de
2024-08-20 12:49:34.270595: I tensorflow/cc/saved_model/loader.cc

INFO:tensorflow:Assets written to: /tmp/tmpr8t26h9m/assets


INFO:tensorflow:Assets written to: /tmp/tmpr8t26h9m/assets


Saved artifact at '/tmp/tmpr8t26h9m'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 64, 64, 1), dtype=tf.float32, name='keras_tensor')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  139825537743184: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139825538056784: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139825538059600: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139825538059248: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139825537737728: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139825538060128: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139825538064880: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139825538062944: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139825538063296: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139825538064704: TensorSpec(shape=(), dtype=tf.resource, name=None)
  139825538064352:

W0000 00:00:1724138375.563184     956 tf_tfl_flatbuffer_helpers.cc:392] Ignored output_format.
W0000 00:00:1724138375.563234     956 tf_tfl_flatbuffer_helpers.cc:395] Ignored drop_control_dependency.
2024-08-20 12:49:35.563446: I tensorflow/cc/saved_model/reader.cc:83] Reading SavedModel from: /tmp/tmpr8t26h9m
2024-08-20 12:49:35.565338: I tensorflow/cc/saved_model/reader.cc:52] Reading meta graph with tags { serve }
2024-08-20 12:49:35.565377: I tensorflow/cc/saved_model/reader.cc:147] Reading SavedModel debug info (if present) from: /tmp/tmpr8t26h9m
2024-08-20 12:49:35.577327: I tensorflow/cc/saved_model/loader.cc:236] Restoring SavedModel bundle.
2024-08-20 12:49:35.652843: I tensorflow/cc/saved_model/loader.cc:220] Running initialization op on SavedModel bundle at path: /tmp/tmpr8t26h9m
2024-08-20 12:49:35.674558: I tensorflow/cc/saved_model/loader.cc:462] SavedModel load for tags { serve }; Status: success: OK. Took 111116 microseconds.


696328

In [8]:
interpreter = tf.lite.Interpreter(model_path="tflite_models/cry_detection_model_quant.tflite")
interpreter.allocate_tensors()

# Get input and output tensors
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

def preprocess_audio(file_path, img_size):
    y, sr = librosa.load(file_path, sr=None)
    y = librosa.util.normalize(y)
    D = librosa.stft(y, n_fft=2048, hop_length=512)
    D_dB = librosa.amplitude_to_db(np.abs(D), ref=np.max)

    # Calculate zoom factors for resizing
    zoom_factors = [img_size[0] / D_dB.shape[0], img_size[1] / D_dB.shape[1]]
    D_dB_resized = zoom(D_dB, zoom_factors, order=3)  # order=3 for cubic interpolation

    # Add channel dimension to match the original function's output
    D_dB_resized = D_dB_resized[..., np.newaxis]

    return D_dB_resized

def predict(file_path, img_size=IMG_SIZE):
    input_data = preprocess_audio(file_path, img_size)
    input_data = np.expand_dims(input_data, axis=0).astype(np.float32)

    # Set the tensor to point to the input data to be inferred
    interpreter.set_tensor(input_details[0]['index'], input_data)

    # Run inference
    interpreter.invoke()

    output_data = interpreter.get_tensor(output_details[0]['index'])

    return output_data

def process_folder(folder_path, img_size=IMG_SIZE):
    correct_predictions = 0
    total_files = 0
    results = []

    for file_name in os.listdir(folder_path):
        if file_name.endswith('.wav'):
            file_path = os.path.join(folder_path, file_name)
            prediction = predict(file_path, img_size)
            prediction_label = 'Cry' if prediction > 0.5 else 'Not Cry'
            results.append((file_name, prediction_label))
            ground_truth = 'Cry' if '_cry.wav' in file_name else 'Not Cry'

            if prediction_label == ground_truth:
                correct_predictions += 1

            total_files += 1

    accuracy = (correct_predictions / total_files) * 100 if total_files > 0 else 0

    return results, accuracy

folder_path = '{0}/Test'.format(AUDIO_PATH)
predictions, accuracy = process_folder(folder_path)

for file_name, prediction_label in predictions:
    print(f"File: {file_name}, Prediction: {prediction_label}")

print(f"Prediction Accuracy: {accuracy:.2f}%")


INFO: Created TensorFlow Lite XNNPACK delegate for CPU.


File: P19_612_notcry.wav, Prediction: Not Cry
File: P26_829_cry.wav, Prediction: Cry
File: P29_2405_cry.wav, Prediction: Cry
File: P29_62_cry.wav, Prediction: Cry
File: P26_7_cry.wav, Prediction: Cry
File: P36_14_notcry.wav, Prediction: Not Cry
File: P29_35_cry.wav, Prediction: Cry
File: P29_1714_cry.wav, Prediction: Cry
File: P29_724_cry.wav, Prediction: Cry
File: P26_9_cry.wav, Prediction: Cry
File: P29_348_cry.wav, Prediction: Cry
File: P20_388_cry.wav, Prediction: Not Cry
File: P26_824_cry.wav, Prediction: Cry
File: P29_773_cry.wav, Prediction: Cry
File: P29_1564_cry.wav, Prediction: Not Cry
File: P19_607_notcry.wav, Prediction: Not Cry
File: P29_1873_cry.wav, Prediction: Cry
File: P20_895_cry.wav, Prediction: Cry
File: P20_802_cry.wav, Prediction: Cry
File: P29_2090_cry.wav, Prediction: Cry
File: P20_919_cry.wav, Prediction: Not Cry
File: P17_41_cry.wav, Prediction: Cry
File: P29_1452_cry.wav, Prediction: Cry
File: P36_52_notcry.wav, Prediction: Not Cry
File: P29_11_cry.wav, Predi

In [9]:
lib = ctypes.cdll.LoadLibrary('{0}/libtensorflowlite_c.so'.format(AUDIO_PATH))

# Define types for the C API functions
lib.TfLiteModelCreate.restype = ctypes.POINTER(ctypes.c_void_p)
lib.TfLiteInterpreterCreate.restype = ctypes.POINTER(ctypes.c_void_p)
lib.TfLiteInterpreterOptionsCreate.restype = ctypes.POINTER(ctypes.c_void_p)
lib.TfLiteInterpreterOptionsSetNumThreads.argtypes = [ctypes.POINTER(ctypes.c_void_p), ctypes.c_int]
lib.TfLiteInterpreterOptionsDelete.argtypes = [ctypes.POINTER(ctypes.c_void_p)]
lib.TfLiteInterpreterDelete.argtypes = [ctypes.POINTER(ctypes.c_void_p)]
lib.TfLiteModelDelete.argtypes = [ctypes.POINTER(ctypes.c_void_p)]
lib.TfLiteInterpreterGetInputTensor.restype = ctypes.POINTER(ctypes.c_void_p)
lib.TfLiteInterpreterGetOutputTensor.restype = ctypes.POINTER(ctypes.c_void_p)

model_path = b"tflite_models/cry_detection_model_quant.tflite"
with open(model_path, 'rb') as f:
    model_data = f.read()

model = lib.TfLiteModelCreate(ctypes.c_char_p(model_data), ctypes.c_size_t(len(model_data)))

# Create interpreter options and set number of threads
options = lib.TfLiteInterpreterOptionsCreate()

# Set number of threads (e.g., 2 threads)
lib.TfLiteInterpreterOptionsSetNumThreads(options, 2)

# Create the interpreter with the custom options
interpreter = lib.TfLiteInterpreterCreate(model, options)

# Allocate tensors
status = lib.TfLiteInterpreterAllocateTensors(interpreter)

# Get input and output tensor details
input_tensor = lib.TfLiteInterpreterGetInputTensor(interpreter, 0)
output_tensor = lib.TfLiteInterpreterGetOutputTensor(interpreter, 0)

# def preprocess_audio(file_path, img_size):
#     y, sr = librosa.load(file_path, sr=None)
#     y = librosa.util.normalize(y)
#     D = librosa.stft(y, n_fft=2048, hop_length=512)
#     D_dB = librosa.amplitude_to_db(np.abs(D), ref=np.max)

#     # Rescale the spectrogram to the target img_size
#     # zoom_factors = [img_size[0] / D_dB.shape[0], img_size[1] / D_dB.shape[1]]
#     # D_dB_resized = zoom(D_dB, zoom_factors).astype(np.float32)

#     # Resize using TensorFlow
#     # D_dB_resized = tf.image.resize(D_dB[..., np.newaxis], img_size).numpy()
#     # D_dB_resized = np.squeeze(D_dB_resized, axis=-1).astype(np.float32)

#     # Convert the spectrogram to an image
#     D_dB_img = Image.fromarray(D_dB)

#     # Resize the image using PIL with LANCZOS resampling
#     D_dB_resized = D_dB_img.resize(img_size, Image.Resampling.LANCZOS)

#     # Convert back to NumPy array
#     D_dB_resized = np.array(D_dB_resized).astype(np.float32)

#     return D_dB_resized

def preprocess_audio(file_path, img_size):
    y, sr = librosa.load(file_path, sr=None)
    y = librosa.util.normalize(y)
    D = librosa.stft(y, n_fft=2048, hop_length=512)
    D_dB = librosa.amplitude_to_db(np.abs(D), ref=np.max)

    # Calculate zoom factors for resizing
    zoom_factors = [img_size[0] / D_dB.shape[0], img_size[1] / D_dB.shape[1]]
    D_dB_resized = zoom(D_dB, zoom_factors, order=3)  # order=3 for cubic interpolation

    # Add channel dimension to match the original function's output
    D_dB_resized = D_dB_resized[..., np.newaxis]

    return D_dB_resized

def predict(file_path, img_size=(64, 64)):
    input_data = preprocess_audio(file_path, img_size)
    input_data = np.expand_dims(input_data, axis=0).astype(np.float32)

    # Set the tensor to point to the input data to be inferred
    lib.TfLiteTensorCopyFromBuffer(input_tensor, input_data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), ctypes.c_size_t(input_data.nbytes))

    # Run inference
    lib.TfLiteInterpreterInvoke(interpreter)

    # Extract output data
    output_size = 1
    output_data = np.empty(output_size, dtype=np.float32)
    lib.TfLiteTensorCopyToBuffer(output_tensor, output_data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), ctypes.c_size_t(output_data.nbytes))

    return output_data

def process_folder(folder_path, img_size=IMG_SIZE):
    correct_predictions = 0
    total_files = 0
    results = []

    for file_name in os.listdir(folder_path):
        if file_name.endswith('.wav'):
            file_path = os.path.join(folder_path, file_name)
            prediction = predict(file_path, img_size)
            prediction_label = 'Cry' if prediction > 0.5 else 'Not Cry'
            results.append((file_name, prediction_label))
            ground_truth = 'Cry' if '_cry.wav' in file_name else 'Not Cry'

            if prediction_label == ground_truth:
                correct_predictions += 1

            total_files += 1

    accuracy = (correct_predictions / total_files) * 100 if total_files > 0 else 0

    return results, accuracy

folder_path = '{0}/Test'.format(AUDIO_PATH)
predictions, accuracy = process_folder(folder_path)

for file_name, prediction_label in predictions:
    print(f"File: {file_name}, Prediction: {prediction_label}")

print(f"Prediction Accuracy: {accuracy:.2f}%")

# Clean up
lib.TfLiteInterpreterDelete(interpreter)
lib.TfLiteInterpreterOptionsDelete(options)
lib.TfLiteModelDelete(model)

print("All operations completed successfully.")


File: P19_612_notcry.wav, Prediction: Not Cry
File: P26_829_cry.wav, Prediction: Cry
File: P29_2405_cry.wav, Prediction: Cry
File: P29_62_cry.wav, Prediction: Cry
File: P26_7_cry.wav, Prediction: Cry
File: P36_14_notcry.wav, Prediction: Not Cry
File: P29_35_cry.wav, Prediction: Cry
File: P29_1714_cry.wav, Prediction: Cry
File: P29_724_cry.wav, Prediction: Cry
File: P26_9_cry.wav, Prediction: Cry
File: P29_348_cry.wav, Prediction: Cry
File: P20_388_cry.wav, Prediction: Not Cry
File: P26_824_cry.wav, Prediction: Cry
File: P29_773_cry.wav, Prediction: Cry
File: P29_1564_cry.wav, Prediction: Not Cry
File: P19_607_notcry.wav, Prediction: Not Cry
File: P29_1873_cry.wav, Prediction: Cry
File: P20_895_cry.wav, Prediction: Cry
File: P20_802_cry.wav, Prediction: Cry
File: P29_2090_cry.wav, Prediction: Cry
File: P20_919_cry.wav, Prediction: Not Cry
File: P17_41_cry.wav, Prediction: Cry
File: P29_1452_cry.wav, Prediction: Cry
File: P36_52_notcry.wav, Prediction: Not Cry
File: P29_11_cry.wav, Predi