In [1]:
"""
📈 Final Model Accuracies (sorted):
XGBoost: 96.34%
Random Forest: 96.07%
SVM (RBF): 96.07%
MLP: 93.98%
Gradient Boosting: 93.46%
Logistic Regression: 90.05%
k-NN: 83.77%
Naive Bayes: 67.54%
"""

'\n📈 Final Model Accuracies (sorted):\nXGBoost: 96.34%\nRandom Forest: 96.07%\nSVM (RBF): 96.07%\nMLP: 93.98%\nGradient Boosting: 93.46%\nLogistic Regression: 90.05%\nk-NN: 83.77%\nNaive Bayes: 67.54%\n'

In [2]:
import os
import pickle
import numpy as np
import librosa
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
from sklearn.svm import SVC
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.neural_network import MLPClassifier
from xgboost import XGBClassifier
from pydub import AudioSegment
import warnings
import collections
import random

warnings.filterwarnings('ignore')

In [3]:
def save_best_model(trained_models, results, output_dir="."):
    """
    Saves the model with the highest accuracy from the results dict
    as 'infant_cry_detection_classifier_model.pkl'.
    
    Parameters:
    - trained_models: dict of {model_name: (model, scaler)}
    - results: dict of {model_name: accuracy}
    - output_dir: directory to save the model (default: current directory)
    
    Returns:
    - saved_path: full path to saved model
    - best_model_name: name of the best model
    """
    best_model_name = max(results, key=results.get)
    best_model, best_scaler = trained_models[best_model_name]
    
    filename = "infant_cry_detection_classifier_model.pkl"
    saved_path = os.path.join(output_dir, filename)

    with open(saved_path, 'wb') as f:
        pickle.dump((best_model, best_scaler), f)

    print(f"\n✅ Best model '{best_model_name}' saved as: {saved_path}")
    return saved_path, best_model_name


In [4]:
# -----------------------------
# Audio Processing Utilities
# -----------------------------

def convert_to_wav(file_path):
    """Convert non-WAV files to WAV using pydub."""
    try:
        if file_path.lower().endswith('.wav'):
            return file_path
        wav_path = os.path.splitext(file_path)[0] + '.wav'
        audio = AudioSegment.from_file(file_path)
        audio.export(wav_path, format='wav')
        return wav_path
    except Exception as e:
        print(f"Error converting {file_path}: {e}")
        return None

In [5]:
def extract_features(audio, sample_rate):
    """Extract MFCC-based features including deltas."""
    try:
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        delta = librosa.feature.delta(mfccs)
        delta2 = librosa.feature.delta(mfccs, order=2)
        features = np.hstack([
            np.mean(mfccs.T, axis=0),
            np.std(mfccs.T, axis=0),
            np.max(mfccs.T, axis=0),
            np.mean(delta.T, axis=0),
            np.mean(delta2.T, axis=0)
        ])
        return features
    except Exception as e:
        print(f"Error extracting features: {e}")
        return None

In [6]:
def augment_audio(audio, sr):
    """Return list of augmented audio versions."""
    return [
        librosa.effects.pitch_shift(audio, sr=sr, n_steps=random.uniform(-2, 2)),
        librosa.effects.time_stretch(audio, rate=random.uniform(0.9, 1.1)),
        audio + 0.005 * np.random.randn(len(audio))
    ]

In [7]:
# -----------------------------
# Dataset Preparation
# -----------------------------

def balance_dataset_with_augmentation(folder_names, base_path="."):
    """
    Load and balance dataset by augmenting minority classes
    to match the size of the majority class.
    """
    class_counts = {}
    class_files = {}

    for label, folder in enumerate(folder_names):
        path = os.path.join(base_path, folder)
        if not os.path.exists(path):
            continue
        files = [f for f in os.listdir(path) if f.lower().endswith(('.wav', '.mp3', '.m4a', '.ogg'))]
        class_counts[label] = len(files)
        class_files[label] = files

    max_class = max(class_counts, key=class_counts.get)
    max_samples = class_counts[max_class]
    print(f"\n📌 Max class: {folder_names[max_class]} with {max_samples} samples.")

    X, y = [], []

    for label, files in class_files.items():
        folder_path = os.path.join(base_path, folder_names[label])
        current_features = []

        for file in files:
            file_path = os.path.join(folder_path, file)
            wav_path = convert_to_wav(file_path)
            if not wav_path:
                continue
            try:
                audio, sr = librosa.load(wav_path, res_type='kaiser_fast')
            except:
                continue
            features = extract_features(audio, sr)
            if features is not None:
                current_features.append(features)

        X.extend(current_features)
        y.extend([label] * len(current_features))

        if label == max_class:
            continue

        current_count = len(current_features)
        while current_count < max_samples:
            for file in files:
                file_path = os.path.join(folder_path, file)
                wav_path = convert_to_wav(file_path)
                if not wav_path:
                    continue
                try:
                    audio, sr = librosa.load(wav_path, res_type='kaiser_fast')
                except:
                    continue
                for aug_audio in augment_audio(audio, sr):
                    aug_features = extract_features(aug_audio, sr)
                    if aug_features is not None:
                        X.append(aug_features)
                        y.append(label)
                        current_count += 1
                    if current_count >= max_samples:
                        break
                if current_count >= max_samples:
                    break

    return np.array(X), np.array(y)

In [8]:
# -----------------------------
# Model Training
# -----------------------------

def train_model(model, X_train, X_test, y_train, y_test, scaler=None):
    """Generic model trainer and evaluator."""
    if scaler:
        X_train = scaler.fit_transform(X_train)
        X_test = scaler.transform(X_test)

    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy_score(y_test, y_pred)
    return model, acc, scaler

In [9]:
# -----------------------------
# Main Pipeline
# -----------------------------

def main():
    folder_names = ['belly_pain', 'burping', 'discomfort', 'hungry', 'tired']
    print("📦 Preparing balanced dataset with augmentation...")
    X, y = balance_dataset_with_augmentation(folder_names)

    # Show class distribution
    label_counts = collections.Counter(y)
    print("\n📊 Class distribution after augmentation:")
    for label, count in sorted(label_counts.items()):
        print(f"{folder_names[label]}: {count}")

    # Split
    X_train, X_test, y_train, y_test = train_test_split(
        X, y, test_size=0.2, stratify=y, random_state=42
    )
    print(f"\n🔹 Train size: {len(X_train)}, Test size: {len(X_test)}")
    print(f"🔹 Feature size: {X.shape[1]}")

    # Models
    models = {
        'Random Forest': (RandomForestClassifier(random_state=42), None),
        'XGBoost': (XGBClassifier(n_estimators=100, use_label_encoder=False, eval_metric='mlogloss', random_state=42), None),
        'SVM (RBF)': (SVC(kernel='rbf', random_state=42), StandardScaler()),
        'Logistic Regression': (LogisticRegression(max_iter=1000, random_state=42), StandardScaler()),
        'k-NN': (KNeighborsClassifier(n_neighbors=5), StandardScaler()),
        'Gradient Boosting': (GradientBoostingClassifier(random_state=42), None),
        'Naive Bayes': (GaussianNB(), None),
        'MLP': (MLPClassifier(hidden_layer_sizes=(100,), max_iter=500, random_state=42), StandardScaler())
    }

    results = {}
    trained_models = {}

    for name, (model, scaler) in models.items():
        print(f"\n🚀 Training {name}...")
        trained_model, acc, trained_scaler = train_model(model, X_train, X_test, y_train, y_test, scaler)
        results[name] = acc
        trained_models[name] = (trained_model, trained_scaler)
        print(f"{name} Accuracy: {acc * 100:.2f}%")

    print("\n📈 Final Model Accuracies (sorted):")
    for name, acc in sorted(results.items(), key=lambda x: x[1], reverse=True):
        print(f"{name}: {acc * 100:.2f}%")

    # save the best model for use it without train everytime
    save_best_model(trained_models, results)


    def predict_audio(file_path, model, scaler=None):
        wav_path = convert_to_wav(file_path)
        if not wav_path:
            return "Invalid audio"
        audio, sr = librosa.load(wav_path, res_type='kaiser_fast')
        features = extract_features(audio, sr)
        if features is None:
            return "Could not extract features"
        features = [features]
        if scaler:
            features = scaler.transform(features)
        pred = model.predict(features)[0]
        return folder_names[pred]

    return trained_models, predict_audio


In [10]:
# -----------------------------
# Entry Point
# -----------------------------
trained_models, predict_func = main()

📦 Preparing balanced dataset with augmentation...

📌 Max class: hungry with 382 samples.

📊 Class distribution after augmentation:
belly_pain: 382
burping: 382
discomfort: 382
hungry: 382
tired: 382

🔹 Train size: 1528, Test size: 382
🔹 Feature size: 200

🚀 Training Random Forest...
Random Forest Accuracy: 98.17%

🚀 Training XGBoost...
XGBoost Accuracy: 96.86%

🚀 Training SVM (RBF)...
SVM (RBF) Accuracy: 97.91%

🚀 Training Logistic Regression...
Logistic Regression Accuracy: 90.84%

🚀 Training k-NN...


  File "d:\defence\model\donateacry_corpus_cleaned_and_updated_data\.conda\lib\site-packages\joblib\externals\loky\backend\context.py", line 257, in _count_physical_cores
    cpu_info = subprocess.run(
  File "d:\defence\model\donateacry_corpus_cleaned_and_updated_data\.conda\lib\subprocess.py", line 503, in run
    with Popen(*popenargs, **kwargs) as process:
  File "d:\defence\model\donateacry_corpus_cleaned_and_updated_data\.conda\lib\subprocess.py", line 971, in __init__
    self._execute_child(args, executable, preexec_fn, close_fds,
  File "d:\defence\model\donateacry_corpus_cleaned_and_updated_data\.conda\lib\subprocess.py", line 1456, in _execute_child
    hp, ht, pid, tid = _winapi.CreateProcess(executable, args,


k-NN Accuracy: 84.55%

🚀 Training Gradient Boosting...
Gradient Boosting Accuracy: 94.76%

🚀 Training Naive Bayes...
Naive Bayes Accuracy: 67.54%

🚀 Training MLP...
MLP Accuracy: 94.24%

📈 Final Model Accuracies (sorted):
Random Forest: 98.17%
SVM (RBF): 97.91%
XGBoost: 96.86%
Gradient Boosting: 94.76%
MLP: 94.24%
Logistic Regression: 90.84%
k-NN: 84.55%
Naive Bayes: 67.54%

✅ Best model 'Random Forest' saved as: .\infant_cry_detection_classifier_model.pkl


In [12]:
test_file = "D:\defence\model\donateacry_corpus_cleaned_and_updated_data\discomfort\\1309B82C-F146-46F0-A723-45345AFA6EA8-1432801693-1.1-f-26-dc.wav"
print(f"\n🔍 Prediction Results for: {test_file}")
for name, (model, scaler) in trained_models.items():
    prediction = predict_func(test_file, model, scaler)
    print(f"{name}: {prediction}")


🔍 Prediction Results for: D:\defence\model\donateacry_corpus_cleaned_and_updated_data\discomfort\1309B82C-F146-46F0-A723-45345AFA6EA8-1432801693-1.1-f-26-dc.wav
Random Forest: discomfort
XGBoost: discomfort
SVM (RBF): discomfort
Logistic Regression: discomfort
k-NN: discomfort
Gradient Boosting: discomfort
Naive Bayes: discomfort
MLP: discomfort
