In [None]:
import os
import numpy as np
import librosa
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score, f1_score


In [None]:
AUDIO_PATH = 'CryCorpusFinal'
CRY_FOLDER = os.path.join(AUDIO_PATH, 'cry/augmented')
NOTCRY_FOLDER = os.path.join(AUDIO_PATH, 'notcry')
IMG_SIZE = (128, 128)  # Not used for these models but may be relevant for spectrogram size


In [None]:
def load_audio_files(folder):
    files = []
    for filename in os.listdir(folder):
        if filename.endswith('.wav'):
            files.append(os.path.join(folder, filename))
    return files

def compute_spectrogram(y, sr, n_fft=2048, hop_length=512):
    D = librosa.stft(y, n_fft=n_fft, hop_length=hop_length)
    D_dB = librosa.amplitude_to_db(np.abs(D), ref=np.max)
    return D_dB

def save_spectrogram_to_disk(D_dB, save_path):
    if not os.path.exists(os.path.dirname(save_path)):
        os.makedirs(os.path.dirname(save_path))
    np.save(save_path, D_dB)

cry_files = load_audio_files(CRY_FOLDER)
notcry_files = load_audio_files(NOTCRY_FOLDER)

data = []
labels = []

for idx, file_path in enumerate(cry_files):
    y, sr = librosa.load(file_path, sr=None)
    spectrogram = compute_spectrogram(y, sr)
    save_path = f'spectrograms/cry_{idx}.npy'
    save_spectrogram_to_disk(spectrogram, save_path)
    data.append(save_path)
    labels.append(1)

for idx, file_path in enumerate(notcry_files):
    y, sr = librosa.load(file_path, sr=None)
    spectrogram = compute_spectrogram(y, sr)
    save_path = f'spectrograms/notcry_{idx}.npy'
    save_spectrogram_to_disk(spectrogram, save_path)
    data.append(save_path)
    labels.append(0)


In [None]:
def load_spectrogram_data(file_paths):
    data = []
    for file_path in file_paths:
        spectrogram = np.load(file_path)
        data.append(spectrogram.flatten())
    return np.array(data)

# Load the data
X_data = load_spectrogram_data(data)
y_data = np.array(labels)


In [None]:
X_train, X_val, y_train, y_val = train_test_split(X_data, y_data, test_size=0.2, random_state=42)


In [None]:
# Logistic Regression
log_reg_model = LogisticRegression(max_iter=1000)
log_reg_model.fit(X_train, y_train)
y_pred_log_reg = log_reg_model.predict(X_val)
acc_log_reg = accuracy_score(y_val, y_pred_log_reg)
f1_log_reg = f1_score(y_val, y_pred_log_reg)
print(f'Logistic Regression - Accuracy: {acc_log_reg}, F1 Score: {f1_log_reg}')

# Random Forest
rf_model = RandomForestClassifier(n_estimators=100)
rf_model.fit(X_train, y_train)
y_pred_rf = rf_model.predict(X_val)
acc_rf = accuracy_score(y_val, y_pred_rf)
f1_rf = f1_score(y_val, y_pred_rf)
print(f'Random Forest - Accuracy: {acc_rf}, F1 Score: {f1_rf}')

# Naive Bayes
nb_model = GaussianNB()
nb_model.fit(X_train, y_train)
y_pred_nb = nb_model.predict(X_val)
acc_nb = accuracy_score(y_val, y_pred_nb)
f1_nb = f1_score(y_val, y_pred_nb)
print(f'Naive Bayes - Accuracy: {acc_nb}, F1 Score: {f1_nb}')


In [None]:
import joblib

# Save Logistic Regression Model
joblib.dump(log_reg_model, 'logistic_regression_model.pkl')

# Save Random Forest Model
joblib.dump(rf_model, 'random_forest_model.pkl')

# Save Naive Bayes Model
joblib.dump(nb_model, 'naive_bayes_model.pkl')
