In [1]:
!pip install librosa scikit-learn matplotlib seaborn gradio




In [1]:
# Mount Google Drive to access dataset
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [2]:
import os
import zipfile

# Assuming the zip is uploaded to your Drive
zip_path = '/content/drive/MyDrive/baby_cry_data.zip'
extract_path = '/content/drive/MyDrive/baby_cry_data'

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

In [3]:
#checking class distribution
import os
from collections import Counter

base_dir = '/content/drive/MyDrive/baby_cry_data'
classes = os.listdir(base_dir)

class_counts = {}
for cls in classes:
    if not cls.startswith('.'):
        files = os.listdir(os.path.join(base_dir, cls))
        class_counts[cls] = len(files)

print(class_counts)

{'BabyCryingSounds': 9}


In [4]:
#convert audio to mel spectogram
import numpy as np
import librosa
import librosa.display
import matplotlib.pyplot as plt

def audio_to_melspectrogram(file_path, max_len=128):
    y, sr = librosa.load(file_path, sr=22050)
    mels = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    log_mels = librosa.power_to_db(mels, ref=np.max)

    if log_mels.shape[1] < max_len:
        pad_width = max_len - log_mels.shape[1]
        log_mels = np.pad(log_mels, pad_width=((0, 0), (0, pad_width)), mode='constant')
    else:
        log_mels = log_mels[:, :max_len]

    return log_mels

In [5]:
import tqdm

X = []
y = []
label_map = {label: idx for idx, label in enumerate(sorted(class_counts.keys()))}

audio_extensions = ('*.wav', '*.ogg')
for label in label_map:
    folder = os.path.join(base_dir, label)
    for file in tqdm.tqdm(base_dir, desc=f"Processing {label}"):
        file_path = os.path.join(folder, file)
        if not file_path.lower().endswith(audio_extensions):  # Skip non-audio files
            continue
        try:
            mel = audio_to_melspectrogram(file_path)
            X.append(mel)
            y.append(label_map[label])
        except Exception as e:
            print(f"Error processing {file_path}: {e}")





Processing BabyCryingSounds: 100%|██████████| 36/36 [00:00<00:00, 60422.15it/s]


In [6]:
import os
import glob
from collections import defaultdict

# Path to the main dataset folder on Google Drive
base_dir = "/content/drive/MyDrive/baby_cry_data/BabyCryingSounds"

# Expected classes
labels = ['belly pain', 'burping', 'cold_hot', 'discomfort',
                   'hungry', 'laugh', 'noise', 'silence', 'tired']

# Mapping: label name → list of audio file paths
data_by_class = defaultdict(list)

# Build class-to-files mapping
audio_extensions = ('*.wav', '*.ogg')

for label in labels:
    folder_path = os.path.join(base_dir, label)
    if os.path.exists(folder_path):
        audio_files = []
        for ext in audio_extensions:
            audio_files.extend(glob.glob(os.path.join(folder_path, ext)))
        data_by_class[label].extend(audio_files)
        print(f"✅ Loaded {len(audio_files)} files for class: {label}")
    else:
        print(f"⚠️ Folder not found for class: {label}")


# Summary of data distribution
print("\n📊 Class distribution:")
for label, files in data_by_class.items():
    print(f"{label:12s} → {len(files)} files")



✅ Loaded 124 files for class: belly pain
✅ Loaded 108 files for class: burping
✅ Loaded 108 files for class: cold_hot
✅ Loaded 135 files for class: discomfort
✅ Loaded 382 files for class: hungry
✅ Loaded 108 files for class: laugh
✅ Loaded 108 files for class: noise
✅ Loaded 108 files for class: silence
✅ Loaded 132 files for class: tired

📊 Class distribution:
belly pain   → 124 files
burping      → 108 files
cold_hot     → 108 files
discomfort   → 135 files
hungry       → 382 files
laugh        → 108 files
noise        → 108 files
silence      → 108 files
tired        → 132 files


Convert audio to melspec

In [8]:
# Step 2: Convert Audio to Mel Spectrogram

import librosa
import numpy as np

IMG_HEIGHT = 128
IMG_WIDTH = 128

def audio_to_mel(file_path):
    y, sr = librosa.load(file_path, sr=22050)
    mels = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=IMG_HEIGHT)
    db = librosa.power_to_db(mels, ref=np.max)

    if db.shape[1] < IMG_WIDTH:
        pad = IMG_WIDTH - db.shape[1]
        db = np.pad(db, ((0, 0), (0, pad)), mode='constant')
    else:
        db = db[:, :IMG_WIDTH]

    return db

In [10]:
# Step 3: Build Dataset
X = []
y = []

# Use data_by_class from the previous cell
for label, file_paths in data_by_class.items():
    for path in file_paths:
        try:
            mel = audio_to_mel(path)
            X.append(mel)
            y.append(label)  # Append the label string

        except Exception as e:
            print(f"Error: {path} - {e}")

X = np.array(X)
X = X[..., np.newaxis]

In [14]:
#step4
import numpy as np
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# Convert labels to numpy array
y = np.array(y)

# Encode string labels into integers
le = LabelEncoder()
y_encoded = le.fit_transform(y)  # e.g., 'hungry' → 4

# For model training
y_cat = to_categorical(y_encoded)

# For saving to folders later
y_labels = le.inverse_transform(y_encoded)  # back to original labels

#step 5
from sklearn.model_selection import train_test_split

# First split: Train + Test (80/20)
X_train_val, X_test, y_train_val, y_test = train_test_split(
    X, y_labels, test_size=0.2, random_state=42, stratify=y_labels
)

# Second split: Train + Val (80/20 of the remaining → 64/16 overall)
X_train, X_val, y_train, y_val = train_test_split(
    X_train_val, y_train_val, test_size=0.2, random_state=42, stratify=y_train_val
)


In [15]:
import os

output_base = '/content/baby_cry_split'

# Create directories for train, val, test
for split in ['train', 'val', 'test']:
    for class_name in le.classes_:
        os.makedirs(os.path.join(output_base, split, class_name), exist_ok=True)

from tqdm import tqdm

def save_split(X_split, y_split, split_name):
    for idx, (mel, label) in enumerate(tqdm(zip(X_split, y_split), total=len(X_split), desc=f"Saving {split_name}")):
        save_path = os.path.join(output_base, split_name, label, f"mel_{idx}.npy")
        np.save(save_path, mel)

# Save all splits
save_split(X_train, y_train, 'train')
save_split(X_val, y_val, 'val')
save_split(X_test, y_test, 'test')


Saving train: 100%|██████████| 840/840 [00:00<00:00, 2917.52it/s]
Saving val: 100%|██████████| 210/210 [00:00<00:00, 2380.02it/s]
Saving test: 100%|██████████| 263/263 [00:00<00:00, 3075.19it/s]
