# **Deciphering Infant Distress:** *Unveiling the Hidden Patterns Behind Baby Cries Using Machine Learning Techniques*

## **Part 1:** Let's train a ML model and test it's performance

We will begin by collecting all of the sample audio files we have, splitting them into smaller audio snippets and training a collection of machine learning algorithms with one part of this data.  With another part of the data we will test and see how well the algorithms predict data they have never seen before and then choose the best algorithm for our project

### **Step 1:**  Grab the audio file and its label (we have 9 labels: belly_pain, cold_hot, discomfort, hungry, lonely, need_to_burp, scared, tired, unknown)

In [1]:
import os

# Store all audio files in a dictionary where key: filename, value: label
raw_audio = {}

# Define directories for each label
directories = {
    'belly_pain_folder': 'bp',
    'cold_hot_folder': 'ch',
    'discomfort_folder': 'dc',
    'hungry_folder': 'hu',
    'lonely_folder': 'lo',
    'need_to_burp_folder': 'bu',
    'scared_folder': 'sc',
    'tired_folder': 'ti',
    'unknown_folder': 'un',
}

# Path to the Training_Dataset folder
dataset_folder = "dataset"

# Iterate through each directory
for directory, label in directories.items():
    # Path to the current label folder
    label_directory = os.path.join(dataset_folder, directory)
    # Iterate through files in the directory
    for filename in os.listdir(label_directory):
        if filename.endswith(".wav"):
            # Add file path and label to the dictionary
            raw_audio[os.path.join(label_directory, filename)] = label

# Print the dictionary containing file paths and labels
# print(raw_audio)

### **Step 2:**  Split the audio files into 1 second snippets and save them in corresponding folders of their label names

In [2]:
import wave 
import math
import os

def split_audio(filename, folder):
    # Open the audio file
    handle = wave.open(filename, 'rb')
    frame_rate = handle.getframerate() # Frame rate
    n_frames = handle.getnframes() # Total number of frames
    window_size = frame_rate  # Set window size with no. of frames generated in 1 second
    num_secs = int(math.ceil(n_frames / frame_rate)) # Total number of seconds in the audio file
    
    # Create the output directory if it doesn't exist
    output_dir = f'splitted_audio/{folder}'
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)
    
    # Slicing Audio file
    for i in range(num_secs):
        # Generate snippet filename
        shortfilename = os.path.basename(filename).split(".")[0]
        snippetfilename = f'{output_dir}/{shortfilename}_snippet{i+1}.wav'
        # print(snippetfilename)
        
        # Open snippet file for writing
        with wave.open(snippetfilename, 'wb') as snippet:
            snippet.setnchannels(handle.getnchannels()) # number of independent audio signals within the audio file
            # print(handle.getnchannels())
            snippet.setsampwidth(handle.getsampwidth()) # number of bytes used to represent each audio sample
            # print(handle.getsampwidth())
            snippet.setframerate(frame_rate)
            
            # Write frames for 1-second snippet
            snippet.writeframes(handle.readframes(window_size))
            
    handle.close()

# Iterate through each audio file in raw_audio dictionary
for audio_file, label in raw_audio.items():
    split_audio(audio_file, label)

### **Step 3:**  Transform .wav files to frequency spectrum "fingerprints" using MFCC algorithm

In [3]:
import pandas as pd
import librosa 
import numpy as np
'''Split and Transform each track'''
X = pd.DataFrame(columns = np.arange(45), dtype = 'float32').astype(np.float32)
j = 0
k = 0
l = 0
m = 0
n = 0
o = 0
p = 0
q = 0

for i, filename in enumerate(os.listdir('splitted_audio/bp/')):
    last_number_frames = -1
    if filename.endswith(".wav"):
        #print filename
        audiofile, sr = librosa.load("splitted_audio/bp/" + filename)
        fingerprint = librosa.feature.mfcc(y=audiofile, sr=sr, n_mfcc=1)
        x = pd.DataFrame(fingerprint, dtype = 'float32')
        x[44] = 'bp'
        X.loc[i] = x.loc[0]
        j = i
        
for i, filename in enumerate(os.listdir('splitted_audio/bu/')):
    if filename.endswith(".wav"):
        #print filename
        audiofile, sr = librosa.load("splitted_audio/bu/" + filename)
        fingerprint = librosa.feature.mfcc(y=audiofile, sr=sr, n_mfcc=1)
        x = pd.DataFrame(fingerprint, dtype = 'float32')
        x[44] = 'bu'
        X.loc[i+j] = x.loc[0] 
        k = i 
        
for i, filename in enumerate(os.listdir('splitted_audio/ch/')):
    if filename.endswith(".wav"):
        #print filename
        audiofile, sr = librosa.load("splitted_audio/ch/" + filename)
        fingerprint = librosa.feature.mfcc(y=audiofile, sr=sr, n_mfcc=1)
        x = pd.DataFrame(fingerprint, dtype = 'float32')
        x[44] = 'ch'
        X.loc[i+j+k] = x.loc[0]
        l = i
        
for i, filename in enumerate(os.listdir('splitted_audio/dc/')):
    if filename.endswith(".wav"):
        #print filename
        audiofile, sr = librosa.load("splitted_audio/dc/" + filename)
        fingerprint = librosa.feature.mfcc(y=audiofile, sr=sr, n_mfcc=1)
        x = pd.DataFrame(fingerprint, dtype = 'float32')
        x[44] = 'dc'
        X.loc[i+j+k+l] = x.loc[0]
        m = i
        
for i, filename in enumerate(os.listdir('splitted_audio/hu/')):
    if filename.endswith(".wav"):
        #print filename
        audiofile, sr = librosa.load("splitted_audio/hu/" + filename)
        fingerprint = librosa.feature.mfcc(y=audiofile, sr=sr, n_mfcc=1)
        x = pd.DataFrame(fingerprint, dtype = 'float32')
        x[44] = 'hu'
        X.loc[i+j+k+l+m] = x.loc[0]
        n = i
        
for i, filename in enumerate(os.listdir('splitted_audio/lo/')):
    if filename.endswith(".wav"):
        #print filename
        audiofile, sr = librosa.load("splitted_audio/lo/" + filename)
        fingerprint = librosa.feature.mfcc(y=audiofile, sr=sr, n_mfcc=1)
        x = pd.DataFrame(fingerprint, dtype = 'float32')
        x[44] = 'lo'
        X.loc[i+j+k+l+m+n] = x.loc[0]
        o = i
        
for i, filename in enumerate(os.listdir('splitted_audio/sc/')):
    if filename.endswith(".wav"):
        #print filename
        audiofile, sr = librosa.load("splitted_audio/sc/" + filename)
        fingerprint = librosa.feature.mfcc(y=audiofile, sr=sr, n_mfcc=1)
        x = pd.DataFrame(fingerprint, dtype = 'float32')
        x[44] = 'sc'
        X.loc[i+j+k+l+m+n+o] = x.loc[0]
        p = i
        
for i, filename in enumerate(os.listdir('splitted_audio/ti/')):
    if filename.endswith(".wav"):
        #print filename
        audiofile, sr = librosa.load("splitted_audio/ti/" + filename)
        fingerprint = librosa.feature.mfcc(y=audiofile, sr=sr, n_mfcc=1)
        x = pd.DataFrame(fingerprint, dtype = 'float32')
        x[44] = 'ti'
        X.loc[i+j+k+l+m+n+o+p] = x.loc[0]
        q = i
        
for i, filename in enumerate(os.listdir('splitted_audio/un/')):
    if filename.endswith(".wav"):
        #print filename
        audiofile, sr = librosa.load("splitted_audio/un/" + filename)
        fingerprint = librosa.feature.mfcc(y=audiofile, sr=sr, n_mfcc=1)
        x = pd.DataFrame(fingerprint, dtype = 'float32')
        x[44] = 'un'
        X.loc[i+j+k+l+m+n+o+p+q] = x.loc[0]
        
        
#Do something with missing values. you might want to do something more sophisticated with missing values later
X = X.fillna(0)

  X = X.fillna(0)


In [4]:
# With each row representing an audio sample and each column representing a feature. (last column represents label)
X.head(100)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,35,36,37,38,39,40,41,42,43,44
0,-886.068054,-886.068054,-886.068054,-886.068054,-886.068054,-886.068054,-886.068054,-886.068054,-886.068054,-886.068054,...,-658.678040,-639.897400,-588.136902,-586.255798,-610.009155,-610.174255,-635.943604,-703.638977,-732.864746,bp
1,-751.045105,-729.236511,-708.148621,-693.773499,-670.862305,-624.201660,-610.616272,-610.707886,-620.584717,-650.334167,...,-785.789490,-775.371216,-767.533813,-774.619324,-776.557129,-774.774963,-771.559326,-772.556458,-794.632568,bp
2,-772.133057,-744.162903,-741.084167,-741.149719,-732.525146,-725.661926,-725.300781,-726.574707,-738.909912,-756.122864,...,-738.214294,-714.469360,-688.410828,-644.362305,-595.480042,-571.091003,-550.012207,-497.703308,-478.067719,bp
3,-486.896667,-504.154694,-528.663208,-498.589417,-473.853363,-465.715942,-491.972198,-515.916199,-522.104370,-507.522522,...,-513.860657,-504.099640,-507.292999,-541.001099,-554.377930,-553.175903,-563.970520,-528.158081,-509.625885,bp
4,-489.437866,-469.426788,-466.268738,-480.051727,-509.546631,-514.609497,-500.760651,-492.276031,-497.030273,-489.642792,...,-353.614197,-355.154236,-356.105713,-360.017853,-348.864044,-338.575348,-336.447205,-293.900299,-282.138824,bp
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
95,-324.295624,-377.439545,-505.412476,-491.936646,-481.928009,-483.608429,-474.369904,-472.105255,-460.829437,-456.104889,...,-515.246582,-515.018005,-519.019836,-522.249634,-519.693848,-520.944214,-529.458252,-534.539734,-550.338623,un
96,-530.313660,-511.796326,-513.623108,-515.572998,-513.042847,-509.931915,-502.527100,-446.869476,-398.313110,-390.141510,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,un
97,-806.703186,-806.703186,-806.703186,-806.703186,-806.703186,-806.703186,-806.703186,-806.703186,-806.703186,-806.703186,...,-423.700836,-404.463776,-384.130310,-369.196014,-352.536285,-347.390503,-363.463074,-332.913757,-322.312775,un
98,-330.191193,-326.846954,-355.417877,-349.174164,-349.915253,-351.735413,-345.113464,-352.565277,-368.286285,-389.176483,...,-440.209442,-453.926819,-460.867767,-468.497070,-441.149658,-422.915894,-424.338867,-401.523956,-396.849396,un


### **Step 4:**  Make a Test-Train-Split of the data

In [5]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

y = X[44]
del X[44]
X = X.astype(np.float32)

# 75% of the data is allocated to the training set (X_train and y_train) and 25% to the testing set (X_test and y_test)
X_train, X_test, y_train, y_test = train_test_split(X, y)


### **Step 5:**  Fit the training data to a model & Check the models performance against the test data¶

In [6]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.svm import SVC
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import accuracy_score, classification_report
from sklearn.metrics import confusion_matrix, precision_score, recall_score

def get_scores(classifier, X_train, X_test, y_train, y_test, **kwargs):
    model = classifier(**kwargs)
    model.fit(X_train, y_train)
    y_predict = model.predict(X_test)
    accuracy = model.score(X_test, y_test)
    precision = precision_score(y_test, y_predict, average='weighted', labels=np.unique(y_predict))
    return accuracy, precision


print("    Model: (Accuracy, Precision)")
print("    Random Forest:", get_scores(RandomForestClassifier, X_train, X_test, y_train, y_test, n_estimators=25, max_features=5))
print("    Logistic Regression:", get_scores(LogisticRegression, X_train, X_test, y_train, y_test))
print("    Decision Tree:", get_scores(DecisionTreeClassifier, X_train, X_test, y_train, y_test))
print("    SVM:", get_scores(SVC, X_train, X_test, y_train, y_test))


    Model: (Accuracy, Precision)
    Random Forest: (0.5943600867678959, 0.5568291505791505)
    Logistic Regression: (0.596529284164859, 0.5481645725548164)


STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


    Decision Tree: (0.5292841648590022, 0.5169328704311609)
    SVM: (0.6160520607375272, 0.7560899715504978)


### **Step 6:** After we are satisfied with the results of our model, we save the model into a .pkl file that we can quickly use to make predictions of new data.  I will fit a new SVM model that uses all of the data I have and save it as 'mySVM.pkl'

In [7]:
import os
import pickle
from sklearn.svm import SVC

def pickle_model(model, modelname):
    # Create the "models" directory if it doesn't exist
    if not os.path.exists('models'):
        os.makedirs('models')
    
    # Save the model to a .pkl file
    with open(f'models/{modelname}.pkl', 'wb') as f:
        pickle.dump(model, f)

# Assuming you have already trained your SVC model and have X, y data
# Initialize and train your SVM model
model_svm = SVC()
model_svm.fit(X, y)

# Save the trained model
pickle_model(model_svm, "mySVM")


## **Part 2**
## Let's see if it works! Making Actual Predictions on new sounds

### **Step 1:** Load the model from disk into Python

In [8]:
import pickle

def getModel(pickle_path):
    with open(pickle_path, 'rb') as f:
        return pickle.load(f)

model = getModel("models/mySVM.pkl")

### **Step 2:**  Split the .wav file and store it in a folder 

In [12]:
import soundfile as sf
import os
import math

def chop_new_audio(filename, folder):
    try:
        # Open the input audio file using soundfile
        audio, samplerate = sf.read(filename)
        
        # Calculate window size for 1 second
        window_size = samplerate
        
        # Calculate number of snippets
        num_snippets = len(audio) // window_size
        
        # Create the output directory if it doesn't exist
        if not os.path.exists(folder):
            os.makedirs(folder)
        
        # Slicing audio file into 1-second snippets
        for i in range(num_snippets):
            snippetfilename = f'{folder}/{os.path.basename(filename)}_snippet{i+1}.wav'
            
            # Calculate start and end index for the snippet
            start_idx = i * window_size
            end_idx = (i + 1) * window_size
            
            # Write the snippet to a new wave file
            sf.write(snippetfilename, audio[start_idx:end_idx], samplerate)
        
        print("Audio file splitted successfully.")
        
    except Exception as e:
        print(f"Error occurred while splitting audio: {e}")

# Example usage
chop_new_audio("babycryingformilk.wav", "babycryingformilk_snippets")


Audio file splitted successfully.


### **Step 3:**  Transform the splitted snippets into MFCC fingerprints and make a prediction

In [10]:
import os

predictions = []
# Correct the directory path to 'babycryingformilk_snippets/'
for i, filename in enumerate(os.listdir('babycryingformilk_snippets/')):
    last_number_frames = -1
    if filename.endswith(".wav"):
        audiofile, sr = librosa.load("babycryingformilk_snippets/"+filename)
        fingerprint = librosa.feature.mfcc(y=audiofile, sr=sr, n_mfcc=1)
        x = pd.DataFrame(fingerprint, dtype='float32')
        prediction = model.predict(x)  # Pass features 'x' to the predict method
        predictions.append(prediction[0])


### **Step 4:** Take the mode of the predictions to come up with a final predition

In [11]:
from collections import Counter

data = Counter(predictions)
print(data.most_common())   # Returns all unique items and their counts
print(data.most_common(1))

if data.most_common(1)[0][0] == 'bp':
    print("The baby is crying because he/she is likely to be feeling belly pain.")
if data.most_common(1)[0][0] == 'ch':
    print("The baby is crying because he/she is likely to be feeling cold or hot.")
if data.most_common(1)[0][0] == 'dc':
    print("The baby is crying because he/she is likely to be feeling discomfort.")
if data.most_common(1)[0][0] == 'hu':
    print("The baby is crying because he/she is likely to be hungry.")
if data.most_common(1)[0][0] == 'lo':
    print("The baby is crying because he/she is likely to be feeling lonely.")
if data.most_common(1)[0][0] == 'bu':
    print("The baby is crying because he/she likely to be needs to burp.")
if data.most_common(1)[0][0] == 'sc':
    print("The baby is crying because he/she is likely to be scared.")
if data.most_common(1)[0][0] == 'ti':
    print("The baby is crying because he/she is likely to be tired.")
if data.most_common(1)[0][0] == 'un':
    print("The reason why the baby is crying is unknown.")


[('hu', 29)]
[('hu', 29)]
The baby is crying because he/she is likely to be hungry.
