<a href="https://colab.research.google.com/github/shaja-asm/cry-detection/blob/main/tf_lite_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import numpy as np
import librosa
import librosa.display
# import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, LSTM
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from sklearn.utils.class_weight import compute_class_weight
from tensorflow.keras.utils import Sequence
import datetime
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, TensorBoard, EarlyStopping
from tensorflow.keras.optimizers import Adam
from scipy.ndimage import zoom
import ctypes
from kerastuner.tuners import RandomSearch

# gpus = tf.config.experimental.list_physical_devices('GPU')
# if gpus:
#     try:
#         for gpu in gpus:
#             tf.config.experimental.set_memory_growth(gpu, True)
#     except RuntimeError as e:
#         print(e)
# print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

In [2]:
AUDIO_PATH = 'CryCorpusFinal'
CRY_FOLDER = os.path.join(AUDIO_PATH, 'cry/augmented')
NOTCRY_FOLDER = os.path.join(AUDIO_PATH, 'notcry')
IMG_SIZE = (128, 128)
BATCH_SIZE = 32
EPOCHS = 25
MODEL = 'cnn' # Choice: 'cnn' or 'lstm'

In [3]:
def load_audio_files(folder):
    files = []
    for filename in os.listdir(folder):
        if filename.endswith('.wav'):
            files.append(os.path.join(folder, filename))
    return files

def compute_spectrogram(y, sr, n_fft=2048, hop_length=512):
    D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
    D_dB = librosa.amplitude_to_db(np.abs(D), ref=np.max)
    return D_dB

def save_spectrogram_to_disk(D_dB, save_path):
    if not os.path.exists(os.path.dirname(save_path)):
        os.makedirs(os.path.dirname(save_path))
    np.save(save_path, D_dB)


In [4]:
cry_files = load_audio_files(CRY_FOLDER)
notcry_files = load_audio_files(NOTCRY_FOLDER)

data = []
labels = []

for idx, file in enumerate(cry_files):
    y, sr = librosa.load(file, sr=22050)
    y = librosa.util.normalize(y)
    D_dB = compute_spectrogram(y, sr)
    save_path = os.path.join('{0}/spectrograms'.format(AUDIO_PATH), f'cry_{idx}.npy'.format(AUDIO_PATH))
    save_spectrogram_to_disk(D_dB, save_path)
    data.append(save_path)
    labels.append(1)

for idx, file in enumerate(notcry_files):
    y, sr = librosa.load(file, sr=22050)
    y = librosa.util.normalize(y)
    D_dB = compute_spectrogram(y, sr)
    save_path = os.path.join('{0}/spectrograms'.format(AUDIO_PATH), f'notcry_{idx}.npy'.format(AUDIO_PATH))
    save_spectrogram_to_disk(D_dB, save_path)
    data.append(save_path)
    labels.append(0)

data = np.array(data)
labels = np.array(labels)



In [5]:
# Split data
X_train, X_val, y_train, y_val = train_test_split(data, labels, test_size=0.2, random_state=42)

# Improved Data Generator
class OnTheFlyDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, file_paths, labels, batch_size, img_size, shuffle=True, augment=False, is_lstm=False):
        self.file_paths = file_paths
        self.labels = labels
        self.batch_size = batch_size
        self.img_size = img_size
        self.shuffle = shuffle
        self.augment = augment
        self.is_lstm = is_lstm
        self.indices = np.arange(len(self.file_paths))
        self.on_epoch_end()

    def __len__(self):
        # Number of batches per epoch
        return int(np.floor(len(self.file_paths) / self.batch_size))

    def __getitem__(self, index):
        # Get batch indices
        batch_indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        batch_file_paths = [self.file_paths[i] for i in batch_indices]
        batch_labels = [self.labels[i] for i in batch_indices]

        X, y = self.__data_generation(batch_file_paths, batch_labels)
        return X, y

    def on_epoch_end(self):
        # Shuffle indices at the end of each epoch
        if self.shuffle:
            np.random.shuffle(self.indices)

    def __data_generation(self, batch_file_paths, batch_labels):
        # Create empty arrays for data and labels
        X = np.empty((len(batch_file_paths), *self.img_size, 1), dtype=np.float32)
        y = np.empty((len(batch_file_paths),), dtype=int)

        for i, file_path in enumerate(batch_file_paths):
            # Load data from file
            D_dB = np.load(file_path)
            D_dB = D_dB[..., np.newaxis]  # Add channel dimension

            # Resizing
            # Resizing
            zoom_factors = [self.img_size[0] / D_dB.shape[0], self.img_size[1] / D_dB.shape[1], 1]
            D_dB = zoom(D_dB, zoom_factors, order=3)  # Cubic interpolation

            # Augmentation
            if self.augment:
                if np.random.rand() > 0.5:
                    D_dB = np.flip(D_dB, axis=1)  # Flip left-right
                if np.random.rand() > 0.5:
                    D_dB = np.flip(D_dB, axis=0)  # Flip up-down
                if np.random.rand() > 0.5:
                    D_dB = D_dB + np.random.uniform(-0.2, 0.2, size=D_dB.shape)  # Random brightness

            X[i,] = D_dB
            y[i] = batch_labels[i]

        if self.is_lstm:
            # Reshape to (batch_size, time_steps, features) for LSTM
            X = X.reshape(len(batch_file_paths), self.img_size[1], self.img_size[0])

        return X, y

train_generator = OnTheFlyDataGenerator(X_train, y_train, BATCH_SIZE, IMG_SIZE, shuffle=True, augment=True)
val_generator = OnTheFlyDataGenerator(X_val, y_val, BATCH_SIZE, IMG_SIZE, shuffle=False, augment=False)

# l2 regularization
l2_regularizer = tf.keras.regularizers.l2(0.001)

    # Third Conv Block
    model.add(Conv2D(hp.Int('filters_3', min_value=128, max_value=512, step=128), (3, 3), activation='relu', kernel_regularizer=l2_regularizer))
    model.add(BatchNormalization())
    model.add(MaxPooling2D((2, 2)))
    model.add(Dropout(hp.Float('dropout_3', min_value=0.2, max_value=0.5, step=0.1)))

optimizer = Adam(learning_rate=1e-4)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = TensorBoard(log_dir=log_dir, histogram_freq=1, profile_batch='500,520')
checkpoint_callback = ModelCheckpoint('cry_detection_model.keras', monitor='val_loss', save_best_only=True, mode='min')
lr_callback = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
early_stopping_callback = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
class_weights = {0: 1., 1: 1.}

history = model.fit(
    train_generator,
    epochs=EPOCHS,
    validation_data=val_generator,
    class_weight=class_weights,
    callbacks=[tensorboard_callback, checkpoint_callback, lr_callback, early_stopping_callback]
)


Trial 10 Complete [00h 07m 33s]
val_accuracy: 0.8770833611488342

Best val_accuracy So Far: 0.9416666626930237
Total elapsed time: 01h 08m 15s


2024-10-22 14:16:43.943185: I tensorflow/tsl/profiler/lib/profiler_session.cc:104] Profiler session initializing.
2024-10-22 14:16:43.943712: I tensorflow/tsl/profiler/lib/profiler_session.cc:119] Profiler session started.
2024-10-22 14:16:44.064202: I tensorflow/tsl/profiler/lib/profiler_session.cc:131] Profiler session tear down.


Epoch 1/50


INFO:tensorflow:Assets written to: cnn_best_model/assets


Epoch 2/50


INFO:tensorflow:Assets written to: cnn_best_model/assets


Epoch 3/50


INFO:tensorflow:Assets written to: cnn_best_model/assets


Epoch 4/50
Epoch 5/50
Epoch 6/50


INFO:tensorflow:Assets written to: cnn_best_model/assets


Epoch 7/50
Epoch 8/50
Epoch 9/50
 3/62 [>.............................] - ETA: 40s - loss: 0.1598 - accuracy: 0.9583

2024-10-22 14:22:50.779832: I tensorflow/tsl/profiler/lib/profiler_session.cc:104] Profiler session initializing.
2024-10-22 14:22:50.780012: I tensorflow/tsl/profiler/lib/profiler_session.cc:119] Profiler session started.




2024-10-22 14:23:04.787538: I tensorflow/tsl/profiler/lib/profiler_session.cc:70] Profiler session collecting data.
2024-10-22 14:23:04.809324: I tensorflow/tsl/profiler/lib/profiler_session.cc:131] Profiler session tear down.
2024-10-22 14:23:04.820398: I tensorflow/tsl/profiler/rpc/client/save_profile.cc:144] Collecting XSpace to repository: logs/fit/cnn_best_20241022-141643/plugins/profile/2024_10_22_14_23_04/TEC-LAP-47.xplane.pb


Epoch 10/50


INFO:tensorflow:Assets written to: cnn_best_model/assets


Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50


INFO:tensorflow:Assets written to: cnn_best_model/assets


Epoch 16/50


INFO:tensorflow:Assets written to: cnn_best_model/assets


Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50


INFO:tensorflow:Assets written to: cnn_best_model/assets


Epoch 28/50


INFO:tensorflow:Assets written to: cnn_best_model/assets


Epoch 29/50
Epoch 30/50


INFO:tensorflow:Assets written to: cnn_best_model/assets


Epoch 31/50


INFO:tensorflow:Assets written to: cnn_best_model/assets


Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50


In [6]:
def evaluate_and_save_model(model, model_name, val_generator, is_lstm=False):
    # Make predictions
    y_pred = model.predict(val_generator)
    y_pred = (y_pred > 0.5).astype(int)

    # Get true labels from the generator
    y_true = []
    for _, batch_labels in val_generator:
        y_true.extend(batch_labels)
    
    # Convert to numpy array
    y_true = np.array(y_true)

    # Calculate accuracy and F1 score
    acc = accuracy_score(y_true, y_pred)
    f1 = f1_score(y_true, y_pred)

    # Print the metrics
    print(f'Accuracy for {model_name}: {acc}')
    print(f'F1 Score for {model_name}: {f1}')

    # Save the model
    model.save(f'{model_name}_cry_detection_model.keras')
    print(f'{model_name} model saved as {model_name}_cry_detection_model.keras')


# Model selection, training, and evaluation
if MODEL == 'cnn':
    # Evaluate and save the CNN model
    evaluate_and_save_model(model, 'cnn', val_generator, is_lstm=False)

elif MODEL == 'lstm':    
    # Evaluate and save the LSTM model
    evaluate_and_save_model(model, 'lstm', val_generator, is_lstm=True)


Accuracy for cnn: 0.9625
F1 Score for cnn: 0.9660377358490566
cnn model saved as cnn_cry_detection_model.keras


In [7]:
import pathlib

# Create directory for TFLite models
tflite_models_dir = pathlib.Path("tflite_models")
tflite_models_dir.mkdir(exist_ok=True, parents=True)

# Convert to TFLite
converter = tf.lite.TFLiteConverter.from_keras_model(model)

# Allow Select TF Ops for both CNN and LSTM models
converter.target_spec.supported_ops = [tf.lite.OpsSet.TFLITE_BUILTINS, tf.lite.OpsSet.SELECT_TF_OPS]

# Disable experimental lowering of tensor list ops
converter._experimental_lower_tensor_list_ops = False

# Convert the model
tflite_model = converter.convert()

# Save the model
tflite_model_file = tflite_models_dir / "cry_detection_model.tflite"
tflite_model_file.write_bytes(tflite_model)

# Apply optimizations and convert again
converter.optimizations = [tf.lite.Optimize.DEFAULT]

tflite_fp16_model = converter.convert()
tflite_model_fp16_file = tflite_models_dir / "cry_detection_model_quant.tflite"
tflite_model_fp16_file.write_bytes(tflite_fp16_model)

print("TFLite conversion successful!")



INFO:tensorflow:Assets written to: /tmp/tmptap_gjwt/assets


INFO:tensorflow:Assets written to: /tmp/tmptap_gjwt/assets
2024-10-22 14:57:31.755111: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
2024-10-22 14:57:31.756839: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2024-10-22 14:57:31.785148: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/tmptap_gjwt
2024-10-22 14:57:31.789728: I tensorflow/cc/saved_model/reader.cc:91] Reading meta graph with tags { serve }
2024-10-22 14:57:31.789777: I tensorflow/cc/saved_model/reader.cc:132] Reading SavedModel debug info (if present) from: /tmp/tmptap_gjwt
2024-10-22 14:57:31.821724: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:375] MLIR V1 optimization pass is not enabled
2024-10-22 14:57:31.828141: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.
2024-10-22 14:57:32.216720: I tensorflow/cc/saved_model/loader.cc:215] Running initializatio

INFO:tensorflow:Assets written to: /tmp/tmpdxelk7pz/assets


INFO:tensorflow:Assets written to: /tmp/tmpdxelk7pz/assets


TFLite conversion successful!


2024-10-22 14:57:37.433301: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:364] Ignored output_format.
2024-10-22 14:57:37.433375: W tensorflow/compiler/mlir/lite/python/tf_tfl_flatbuffer_helpers.cc:367] Ignored drop_control_dependency.
2024-10-22 14:57:37.433621: I tensorflow/cc/saved_model/reader.cc:45] Reading SavedModel from: /tmp/tmpdxelk7pz
2024-10-22 14:57:37.437253: I tensorflow/cc/saved_model/reader.cc:91] Reading meta graph with tags { serve }
2024-10-22 14:57:37.437275: I tensorflow/cc/saved_model/reader.cc:132] Reading SavedModel debug info (if present) from: /tmp/tmpdxelk7pz
2024-10-22 14:57:37.448634: I tensorflow/cc/saved_model/loader.cc:231] Restoring SavedModel bundle.
2024-10-22 14:57:37.838487: I tensorflow/cc/saved_model/loader.cc:215] Running initialization op on SavedModel bundle at path: /tmp/tmpdxelk7pz
2024-10-22 14:57:37.868745: I tensorflow/cc/saved_model/loader.cc:314] SavedModel load for tags { serve }; Status: success: OK. Took 435125 

In [8]:
# Initialize the TFLite interpreter
interpreter = tf.lite.Interpreter(model_path="tflite_models/cry_detection_model_quant.tflite")
interpreter.allocate_tensors()

# Get input and output tensors
input_details = interpreter.get_input_details()
output_details = interpreter.get_output_details()

def preprocess_audio(file_path, img_size):
    y, sr = librosa.load(file_path, sr=None)
    y = librosa.util.normalize(y)
    D = librosa.stft(y, n_fft=2048, hop_length=512)
    D_dB = librosa.amplitude_to_db(np.abs(D), ref=np.max)

    # Calculate zoom factors for resizing
    zoom_factors = [img_size[0] / D_dB.shape[0], img_size[1] / D_dB.shape[1]]
    D_dB_resized = zoom(D_dB, zoom_factors, order=3)  # Cubic interpolation

    # Add channel dimension to match the original function's output
    if not is_lstm:
        D_dB_resized = D_dB_resized[..., np.newaxis]

    return D_dB_resized

def predict(file_path, img_size=IMG_SIZE):
    input_data = preprocess_audio(file_path, img_size)
    input_data = np.expand_dims(input_data, axis=0).astype(np.float32)

    # Set the tensor to point to the input data to be inferred
    interpreter.set_tensor(input_details[0]['index'], input_data)

    # Run inference
    interpreter.invoke()

    output_data = interpreter.get_tensor(output_details[0]['index'])

    return output_data

def process_folder(folder_path, img_size=IMG_SIZE):
    correct_predictions = 0
    total_files = 0
    results = []

    for file_name in os.listdir(folder_path):
        if file_name.endswith('.wav'):
            file_path = os.path.join(folder_path, file_name)
            prediction = predict(file_path, img_size, is_lstm)
            prediction_label = 'Cry' if prediction > 0.5 else 'Not Cry'
            results.append((file_name, prediction_label))
            ground_truth = 'Cry' if '_cry.wav' in file_name else 'Not Cry'

            if prediction_label == ground_truth:
                correct_predictions += 1

            total_files += 1

    accuracy = (correct_predictions / total_files) * 100 if total_files > 0 else 0

    return results, accuracy

folder_path = '{0}/Test'.format(AUDIO_PATH)
predictions, accuracy = process_folder(folder_path)

for file_name, prediction_label in predictions:
    print(f"File: {file_name}, Prediction: {prediction_label}")

print(f"Prediction Accuracy: {accuracy:.2f}%")


In [12]:
lib = ctypes.cdll.LoadLibrary('{0}/libtensorflowlite_c.so'.format(AUDIO_PATH))

# Define types for the C API functions
lib.TfLiteModelCreate.restype = ctypes.POINTER(ctypes.c_void_p)
lib.TfLiteInterpreterCreate.restype = ctypes.POINTER(ctypes.c_void_p)
lib.TfLiteInterpreterOptionsCreate.restype = ctypes.POINTER(ctypes.c_void_p)
lib.TfLiteInterpreterOptionsSetNumThreads.argtypes = [ctypes.POINTER(ctypes.c_void_p), ctypes.c_int]
lib.TfLiteInterpreterOptionsDelete.argtypes = [ctypes.POINTER(ctypes.c_void_p)]
lib.TfLiteInterpreterDelete.argtypes = [ctypes.POINTER(ctypes.c_void_p)]
lib.TfLiteModelDelete.argtypes = [ctypes.POINTER(ctypes.c_void_p)]
lib.TfLiteInterpreterGetInputTensor.restype = ctypes.POINTER(ctypes.c_void_p)
lib.TfLiteInterpreterGetOutputTensor.restype = ctypes.POINTER(ctypes.c_void_p)

model_path = b"tflite_models/cry_detection_model_quant.tflite"
with open(model_path, 'rb') as f:
    model_data = f.read()

model = lib.TfLiteModelCreate(ctypes.c_char_p(model_data), ctypes.c_size_t(len(model_data)))

# Create interpreter options and set number of threads
options = lib.TfLiteInterpreterOptionsCreate()
lib.TfLiteInterpreterOptionsSetNumThreads(options, 2)

# Create the interpreter with the custom options
interpreter = lib.TfLiteInterpreterCreate(model, options)

# Allocate tensors
status = lib.TfLiteInterpreterAllocateTensors(interpreter)

# Get input and output tensor details
input_tensor = lib.TfLiteInterpreterGetInputTensor(interpreter, 0)
output_tensor = lib.TfLiteInterpreterGetOutputTensor(interpreter, 0)

# def preprocess_audio(file_path, img_size):
#     y, sr = librosa.load(file_path, sr=None)
#     y = librosa.util.normalize(y)
#     D = librosa.stft(y, n_fft=2048, hop_length=512)
#     D_dB = librosa.amplitude_to_db(np.abs(D), ref=np.max)

#     # Rescale the spectrogram to the target img_size
#     # zoom_factors = [img_size[0] / D_dB.shape[0], img_size[1] / D_dB.shape[1]]
#     # D_dB_resized = zoom(D_dB, zoom_factors).astype(np.float32)

#     # Resize using TensorFlow
#     # D_dB_resized = tf.image.resize(D_dB[..., np.newaxis], img_size).numpy()
#     # D_dB_resized = np.squeeze(D_dB_resized, axis=-1).astype(np.float32)

#     # Convert the spectrogram to an image
#     D_dB_img = Image.fromarray(D_dB)

#     # Resize the image using PIL with LANCZOS resampling
#     D_dB_resized = D_dB_img.resize(img_size, Image.Resampling.LANCZOS)

#     # Convert back to NumPy array
#     D_dB_resized = np.array(D_dB_resized).astype(np.float32)

#     return D_dB_resized

def preprocess_audio(file_path, img_size):
    y, sr = librosa.load(file_path, sr=None)
    y = librosa.util.normalize(y)
    D = librosa.stft(y, n_fft=2048, hop_length=512)
    D_dB = librosa.amplitude_to_db(np.abs(D), ref=np.max)

    # Calculate zoom factors for resizing
    zoom_factors = [img_size[0] / D_dB.shape[0], img_size[1] / D_dB.shape[1]]
    D_dB_resized = zoom(D_dB, zoom_factors, order=3)  # Cubic interpolation

    # Add channel dimension for CNN, keep 3D shape for LSTM
    if not is_lstm:
        D_dB_resized = D_dB_resized[..., np.newaxis]

    return D_dB_resized

def predict(file_path, img_size=(64, 64)):
    input_data = preprocess_audio(file_path, img_size)
    input_data = np.expand_dims(input_data, axis=0).astype(np.float32)

    # Set the tensor to point to the input data to be inferred
    lib.TfLiteTensorCopyFromBuffer(input_tensor, input_data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), ctypes.c_size_t(input_data.nbytes))

    # Run inference
    lib.TfLiteInterpreterInvoke(interpreter)

    # Extract output data
    output_size = 1
    output_size = 1
    output_data = np.empty(output_size, dtype=np.float32)
    lib.TfLiteTensorCopyToBuffer(output_tensor, output_data.ctypes.data_as(ctypes.POINTER(ctypes.c_float)), ctypes.c_size_t(output_data.nbytes))

    return output_data

def process_folder(folder_path, img_size=IMG_SIZE):
    correct_predictions = 0
    total_files = 0
    results = []

    # Initialize counters for F1 score calculations
    true_positives = 0
    false_positives = 0
    false_negatives = 0

    for file_name in os.listdir(folder_path):
        if file_name.endswith('.wav'):
            file_path = os.path.join(folder_path, file_name)
            prediction = predict(file_path, img_size, is_lstm)
            prediction_label = 'Cry' if prediction > 0.5 else 'Not Cry'
            results.append((file_name, prediction_label))
            ground_truth = 'Cry' if '_cry.wav' in file_name else 'Not Cry'

            if prediction_label == ground_truth:
                correct_predictions += 1
                if prediction_label == 'Cry':
                    true_positives += 1
            else:
                if prediction_label == 'Cry':
                    false_positives += 1
                elif prediction_label == 'Not Cry' and ground_truth == 'Cry':
                    false_negatives += 1

            total_files += 1

    accuracy = (correct_predictions / total_files) * 100 if total_files > 0 else 0

    # Calculate precision, recall, F1 score
    precision = true_positives / (true_positives + false_positives) if (true_positives + false_positives) > 0 else 0
    recall = true_positives / (true_positives + false_negatives) if (true_positives + false_negatives) > 0 else 0
    f1_score = (2 * precision * recall) / (precision + recall) if (precision + recall) > 0 else 0

folder_path = '{0}/Test'.format(AUDIO_PATH)
predictions, accuracy = process_folder(folder_path)

for file_name, prediction_label in predictions:
    print(f"File: {file_name}, Prediction: {prediction_label}")

print(f"Prediction Accuracy: {accuracy:.2f}%")
print(f"F1 Score: {f1_score:.2f}")
print(f"False Negative Percentage: {false_negative_percentage:.2f}%")

# Clean up
lib.TfLiteInterpreterDelete(interpreter)
lib.TfLiteInterpreterOptionsDelete(options)
lib.TfLiteModelDelete(model)

print("All operations completed successfully.")


In [None]:
import os
import random
import librosa
import soundfile as sf
import numpy as np

def augment_data(input_folder, output_folder, ogg_files):
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)

    # Resample ogg files to 22050 Hz
    ogg_clips = []
    for ogg_file in ogg_files:
        y, sr = librosa.load(ogg_file, sr=22050)
        if len(y) < 5 * sr:
            y = np.tile(y, int(np.ceil(5 * sr / len(y))))[:5 * sr]
        else:
            y = y[:5 * sr]
        ogg_clips.append((y, os.path.basename(ogg_file).split('.')[0]))

    input_files = [f for f in os.listdir(input_folder) if f.endswith('.wav')]
    num_groups = len(ogg_files)
    files_per_group = len(input_files) // num_groups

    # Split input files into groups
    for i, ogg_clip in enumerate(ogg_clips):
        group_files = input_files[i * files_per_group:(i + 1) * files_per_group]
        ogg_clip_data, ogg_clip_name = ogg_clip
        
        for input_file in group_files:
            input_path = os.path.join(input_folder, input_file)
            y, sr = librosa.load(input_path, sr=22050)
            
            # Randomly reduce gain of ogg clip
            gain_reduction = random.uniform(0, -20)
            ogg_clip_adjusted = librosa.util.normalize(ogg_clip_data) * (10 ** (gain_reduction / 20))
            
            # Mix the input file with the ogg clip
            mixed_audio = y + ogg_clip_adjusted[:len(y)]
            mixed_audio = librosa.util.normalize(mixed_audio)
            
            output_file = f"{os.path.splitext(input_file)[0]}_{ogg_clip_name}_augmented.wav"
            output_path = os.path.join(output_folder, output_file)
            sf.write(output_path, mixed_audio, sr)

input_folder = f'{AUDIO_PATH}/cry'
output_folder = f'{AUDIO_PATH}/cry/augmented'
ogg_files = [f'{AUDIO_PATH}/ac.ogg', f'{AUDIO_PATH}/dishwasher.ogg', f'{AUDIO_PATH}/fan.ogg', f'{AUDIO_PATH}/refridgerator.ogg', 
             f'{AUDIO_PATH}/tv.ogg',f'{AUDIO_PATH}/vaccum_cleaner.ogg']

augment_data(input_folder, output_folder, ogg_files)