<a href="https://colab.research.google.com/github/shaja-asm/cry-detection/blob/main/tf_lite_model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import os
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
from tensorflow.keras.utils import Sequence
from sklearn.model_selection import KFold
import datetime


gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except RuntimeError as e:
        print(e)
print("Num GPUs Available: ", len(tf.config.experimental.list_physical_devices('GPU')))

Num GPUs Available:  1


In [2]:
AUDIO_PATH = '/content/drive/MyDrive/CryCorpusFinal'
CRY_FOLDER = os.path.join(AUDIO_PATH, 'cry')
NOTCRY_FOLDER = os.path.join(AUDIO_PATH, 'notcry')
IMG_SIZE = (64, 64)
BATCH_SIZE = 32
EPOCHS = 25

In [3]:
def load_audio_files(folder):
    files = []
    for filename in os.listdir(folder):
        if filename.endswith('.wav'):
            files.append(os.path.join(folder, filename))
    return files

def normalize_audio(y):
    return librosa.util.normalize(y)

def compute_spectrogram(y, sr, n_fft=2048, hop_length=512):
    D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
    D_dB = librosa.amplitude_to_db(np.abs(D), ref=np.max)
    return D_dB

def spectrogram_to_image(D_dB):
    fig, ax = plt.subplots()
    librosa.display.specshow(D_dB, sr=sr, hop_length=512, x_axis='time', y_axis='log', ax=ax)
    ax.axis('off')
    fig.canvas.draw()
    img = np.frombuffer(fig.canvas.tostring_rgb(), dtype=np.uint8)
    img = img.reshape(fig.canvas.get_width_height()[::-1] + (3,))
    plt.close(fig)
    return img

In [4]:
def save_spectrogram_to_disk(D_dB, save_path):
    if not os.path.exists(os.path.dirname(save_path)):
        os.makedirs(os.path.dirname(save_path))
    np.save(save_path, D_dB)

cry_files = load_audio_files(CRY_FOLDER)
notcry_files = load_audio_files(NOTCRY_FOLDER)

data = []
labels = []

for idx, file in enumerate(cry_files):
    y, sr = librosa.load(file, sr=None)
    y = normalize_audio(y)
    D_dB = compute_spectrogram(y, sr)
    save_path = os.path.join('spectrograms', f'cry_{idx}.npy')
    save_spectrogram_to_disk(D_dB, save_path)
    data.append(save_path)
    labels.append(1)

for idx, file in enumerate(notcry_files):
    y, sr = librosa.load(file, sr=None)
    y = normalize_audio(y)
    D_dB = compute_spectrogram(y, sr)
    save_path = os.path.join('spectrograms', f'notcry_{idx}.npy')
    save_spectrogram_to_disk(D_dB, save_path)
    data.append(save_path)
    labels.append(0)

data = np.array(data)
labels = np.array(labels)



In [6]:
# Split the data into training and testing sets
X_train, X_val, y_train, y_val = train_test_split(data, labels, test_size=0.2, random_state=42)

# Custom data generator
class OnTheFlyDataGenerator(tf.keras.utils.Sequence):
    def __init__(self, file_paths, labels, batch_size, img_size, shuffle=True):
        self.file_paths = file_paths
        self.labels = labels
        self.batch_size = batch_size
        self.img_size = img_size
        self.shuffle = shuffle
        self.indices = np.arange(len(self.file_paths))
        self.on_epoch_end()

    def __len__(self):
        return int(np.ceil(len(self.file_paths) / self.batch_size))

    def __getitem__(self, index):
        batch_indices = self.indices[index * self.batch_size:(index + 1) * self.batch_size]
        batch_file_paths = [self.file_paths[i] for i in batch_indices]
        batch_labels = [self.labels[i] for i in batch_indices]

        X, y = self.__data_generation(batch_file_paths, batch_labels)
        return X, y

    def on_epoch_end(self):
        if self.shuffle:
            np.random.shuffle(self.indices)

    def __data_generation(self, batch_file_paths, batch_labels):
        X = np.empty((len(batch_file_paths), *self.img_size, 1), dtype=np.float32)
        y = np.empty((len(batch_file_paths),), dtype=int)

        for i, file_path in enumerate(batch_file_paths):
            D_dB = np.load(file_path)
            D_dB = D_dB[..., np.newaxis]  # Add channel dimension
            D_dB = tf.image.resize(D_dB, self.img_size).numpy()
            X[i,] = D_dB
            y[i] = batch_labels[i]

        return X, y

train_generator = OnTheFlyDataGenerator(X_train, y_train, BATCH_SIZE, IMG_SIZE, shuffle=True)
val_generator = OnTheFlyDataGenerator(X_val, y_val, BATCH_SIZE, IMG_SIZE, shuffle=False)


model = Sequential([
        Conv2D(16, (3, 3), activation='relu', input_shape=(IMG_SIZE[0], IMG_SIZE[1], 1)),
        MaxPooling2D((2, 2)),
        Dropout(0.25),
        Conv2D(32, (3, 3), activation='relu'),
        MaxPooling2D((2, 2)),
        Dropout(0.25),
        Flatten(),
        Dense(64, activation='relu'),
        Dropout(0.5),
        Dense(1, activation='sigmoid')
    ])

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Set up callbacks
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1, profile_batch='500,520')

# Train the model
history = model.fit(
        train_generator,
        epochs=EPOCHS,
        validation_data=val_generator,
        callbacks=tensorboard_callback
    )

# Clear session
# tf.keras.backend.clear_session()

# Save the best model
model.save('cry_detection_best_model.keras')

print("Training complete. Best model saved as 'cry_detection_best_model.keras'")

Epoch 1/25


  self._warn_if_super_not_called()


[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m16s[0m 175ms/step - accuracy: 0.5016 - loss: 9.9555 - val_accuracy: 0.5270 - val_loss: 0.6930
Epoch 2/25
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 89ms/step - accuracy: 0.5178 - loss: 0.6928 - val_accuracy: 0.5245 - val_loss: 0.6928
Epoch 3/25
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 85ms/step - accuracy: 0.5383 - loss: 0.6917 - val_accuracy: 0.5245 - val_loss: 0.6922
Epoch 4/25
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 74ms/step - accuracy: 0.5271 - loss: 0.6871 - val_accuracy: 0.5270 - val_loss: 0.6927
Epoch 5/25
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m6s[0m 94ms/step - accuracy: 0.5439 - loss: 0.6733 - val_accuracy: 0.4755 - val_loss: 0.6543
Epoch 6/25
[1m51/51[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 84ms/step - accuracy: 0.6162 - loss: 0.6100 - val_accuracy: 0.4853 - val_loss: 0.6842
Epoch 7/25
[1m51/51[0m [32m━━━━━━━━━━━━━

In [7]:
y_pred = model.predict(val_generator)
y_pred = (y_pred > 0.5).astype(int)
acc = accuracy_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred)

print(f'Accuracy: {acc}')
print(f'F1 Score: {f1}')

model.save('cry_detection_best_model.keras')

[1m13/13[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 99ms/step
Accuracy: 0.8946078431372549
F1 Score: 0.8948655256723717


In [8]:
# Convert to TensorFlow Lite
converter = tf.lite.TFLiteConverter.from_keras_model(model)
tflite_model = converter.convert()

with open('cry_detection_model.tflite', 'wb') as f:
    f.write(tflite_model)

print('Model converted to TensorFlow Lite and saved.')

Saved artifact at '/tmp/tmpte066mci'. The following endpoints are available:

* Endpoint 'serve'
  args_0 (POSITIONAL_ONLY): TensorSpec(shape=(None, 64, 64, 1), dtype=tf.float32, name='keras_tensor_11')
Output Type:
  TensorSpec(shape=(None, 1), dtype=tf.float32, name=None)
Captures:
  132493970588944: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132495857666352: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132495857666880: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132495857558704: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132493962944480: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132493962946240: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132493962942192: TensorSpec(shape=(), dtype=tf.resource, name=None)
  132493962948528: TensorSpec(shape=(), dtype=tf.resource, name=None)
Model converted to TensorFlow Lite and saved.
