# SafeNSound: Initial Log Scaled Mel-Spectrogram Development

## 1. Importing Libraries

In [None]:
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import librosa as lb
import librosa.display as ld
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers, models
import sounddevice as sd
from sklearn.model_selection import train_test_split

## 2. Datasets

In [2]:
PATH = "datasets/placeholder/"
EMERGENCY_PATH = os.path.join(PATH, "emergency/")
NORMAL_PATH = os.path.join(PATH, "normal/")

def load_audio_files(directory, filename):
    if not os.path.exists(directory):
        raise FileNotFoundError(f"Directory {directory} does not exist.")

    if filename.endswith('.wav'):
        file_path = os.path.join(directory, filename)
        try:
            wav, sr = lb.load(file_path, sr=16000, mono=True)
            ##wav = tf.convert_to_tensor(wav, dtype=tf.float32)
        except Exception as e:
            print(f"Error loading {file_path}: {e}")
    return wav, sr

#get audio real-time
## def record_audio(duration=5, sample_rate=16000):

## 3. Creating Mel Spectrogram & converting to Log Scale

In [14]:
def log_scaled_melspectrogram(audio, sample_rate, n_fft=1024, hop_length=512, n_mels=128, max_len=160):
    S = lb.feature.melspectrogram(y=audio, sr=sample_rate, n_fft=n_fft, hop_length=hop_length, n_mels=n_mels)

    if S.shape[1] < max_len:
        S = np.pad(S, ((0,0),(0, max_len - S.shape[1])), mode='constant')
    else:
        S = S[:, :max_len]

    S_dB = lb.power_to_db(S, ref=np.max)
    
    S_dB = S_dB[..., np.newaxis]
    
    return S_dB

## 4. Display Log-scaled Mel Spectrogram

In [15]:
X = []
y = []
labels = {"emergency": 1, "normal": 0}

for label, idx in labels.items():
    folder = f"{PATH}{label}"
    for file in os.listdir(folder):
        if file.endswith(".wav"):
            wav, sr = load_audio_files(folder, file)
            S_dB = log_scaled_melspectrogram(wav, sr)

            # try:
            #     plt.figure(figsize=(10, 4))
            #     lb.display.specshow(S_dB, sr=sr, hop_length=512, x_axis='time', y_axis='mel')
            #     plt.colorbar(format="%+2.0f dB")
            #     plt.title("Log-scaled Mel Spectrogram of " + file)
            #     plt.tight_layout()
            #     plt.show()
            # except Exception as e:
            #     print(f"Error displaying MFCC for {file}: {e}")
            #     continue

            X.append(S_dB)
            y.append(idx)

X = np.array(X)
y = np.array(y)

## 5. Split

In [20]:
X_train, X_temp, y_train, y_temp = train_test_split(X, y, test_size=0.3, random_state=42, stratify=y)

X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp)

## 6. CNN & Train

In [21]:
def build_cnn(input_shape, num_classes=2):
    model = models.Sequential([
        # Conv Block 1
        layers.Conv2D(32, (3,3), activation='relu', padding='same', input_shape=input_shape),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2,2)),
        layers.Dropout(0.3),

        # Conv Block 2
        layers.Conv2D(64, (3,3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2,2)),
        layers.Dropout(0.3),

        # Conv Block 3
        layers.Conv2D(128, (3,3), activation='relu', padding='same'),
        layers.BatchNormalization(),
        layers.MaxPooling2D((2,2)),
        layers.Dropout(0.3),

        # Flatten + Dense
        layers.Flatten(),
        layers.Dense(128, activation='relu'),
        layers.Dropout(0.4),
        layers.Dense(num_classes, activation='softmax')
    ])

    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    
    return model

In [22]:
input_shape = (X.shape[1], X.shape[2], X.shape[3])

model = build_cnn(input_shape, num_classes=2)

history = model.fit(X_train, y_train, epochs=20, batch_size=32, validation_data=(X_val, y_val))

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


Epoch 1/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 5s/step - accuracy: 0.3846 - loss: 2.6720 - val_accuracy: 0.3333 - val_loss: 27.5583
Epoch 2/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 345ms/step - accuracy: 0.9231 - loss: 0.1063 - val_accuracy: 0.3333 - val_loss: 25.4252
Epoch 3/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 336ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 0.3333 - val_loss: 23.8327
Epoch 4/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 391ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 0.3333 - val_loss: 21.8646
Epoch 5/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 335ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 0.3333 - val_loss: 19.8160
Epoch 6/20
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 360ms/step - accuracy: 1.0000 - loss: 0.0000e+00 - val_accuracy: 0.3333 - val_loss: 17.9131
Epoch 7/20
[1m1/1[0

In [23]:
test_loss, test_acc = model.evaluate(X_test, y_test)
print("Test Accuracy:", test_acc)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 74ms/step - accuracy: 0.6667 - loss: 0.6819
Test Accuracy: 0.6666666865348816
