In [1]:
import numpy as np
import pandas as pd
import glob
import os
import matplotlib.pyplot as plt
import librosa
from IPython.display import Audio
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
import tensorflow as tf

In [2]:
train_folder_path = r'C:./dataset/audioonly/labeled/set 1/Train'
test_folder_path = r'C:./dataset/audioonly/labeled/set 1/Test'

In [3]:
class_names = next(os.walk(train_folder_path))[1]
class_names = class_names
class_names

['belly_pain', 'discomfort', 'hungry', 'tired']

In [4]:
audio_path = [os.path.join(train_folder_path, name) for name in class_names]
audio_path

['C:./dataset/audioonly/labeled/set 1/Train\\belly_pain',
 'C:./dataset/audioonly/labeled/set 1/Train\\discomfort',
 'C:./dataset/audioonly/labeled/set 1/Train\\hungry',
 'C:./dataset/audioonly/labeled/set 1/Train\\tired']

In [5]:
bpain_audio = glob.glob(os.path.join(audio_path[0], '*.wav'))
discomf_audio = glob.glob(os.path.join(audio_path[1], '*.wav'))
hungry_audio = glob.glob(os.path.join(audio_path[2], '*.wav'))
tired_audio = glob.glob(os.path.join(audio_path[3], '*.wav'))

In [6]:
audio_path_byclass = {
    'bpain': bpain_audio,
    'discomf': discomf_audio,
    'hungry': hungry_audio,
    'tired': tired_audio
}

In [7]:
class_names_test = next(os.walk(test_folder_path))[1]
class_names_test = class_names_test
class_names_test

['bellypain', 'discomfort', 'hungry', 'tired']

In [8]:
audio_path_test = [os.path.join(test_folder_path, name) for name in class_names_test]
audio_path_test

['C:./dataset/audioonly/labeled/set 1/Test\\bellypain',
 'C:./dataset/audioonly/labeled/set 1/Test\\discomfort',
 'C:./dataset/audioonly/labeled/set 1/Test\\hungry',
 'C:./dataset/audioonly/labeled/set 1/Test\\tired']

In [9]:
bpain_audio_test = glob.glob(os.path.join(audio_path_test[0], '*.wav'))
discomf_audio_test = glob.glob(os.path.join(audio_path_test[1], '*.wav'))
hungry_audio_test = glob.glob(os.path.join(audio_path_test[2], '*.wav'))
tired_audio_test = glob.glob(os.path.join(audio_path_test[3], '*.wav'))

In [10]:
audio_testnum_byclass = {
    'bpain': bpain_audio_test,
    'discomf': discomf_audio_test,
    'hungry': hungry_audio_test,
    'tired': tired_audio_test
}

## Load Audio

In [11]:
# Replace 'your_audio_file.mp3' with the path to your audio file
load_audio = {}
for name, pathlst in audio_path_byclass.items():
    loaded = []
    for path in pathlst:
        audio, sr = librosa.load(path, sr=16000, mono=True, duration=5.2)
        loaded.append(audio)
    load_audio[name] = loaded

In [12]:
# Replace 'your_audio_file.mp3' with the path to your audio file
mfcc_audio = {}
for name, pathlst in audio_path_byclass.items():
    mfccs = []
    for path in pathlst:
        audio, sr = librosa.load(path, sr=16000, mono=True, duration=5.0)
        mfcc = librosa.feature.mfcc(y=audio, sr=16000, n_mfcc=10)
        mfccs.append(mfcc)
    mfcc_audio[name] = mfccs

In [13]:
import collections

shape = {}

for class_name, lst in mfcc_audio.items():
    for arr in lst:
        shapes = [arr.shape for arr in lst]
        shapes_count = collections.Counter(shapes)
        shape[f'{class_name} mfcc shape counts'] = shapes_count

In [14]:
shape

{'bpain mfcc shape counts': Counter({(10, 157): 16}),
 'discomf mfcc shape counts': Counter({(10, 157): 64}),
 'hungry mfcc shape counts': Counter({(10, 157): 100}),
 'tired mfcc shape counts': Counter({(10, 157): 24})}

## Load Model

In [15]:
# Load the model.
import tensorflow_hub as hub
model = hub.load('https://tfhub.dev/google/yamnet/1')

In [16]:
# Run the model, check the output.
feature = {}
for name, arr in load_audio.items():
    # Initialize dictionaries for each class if not already existing
    if name not in feature:
        feature[name] = {'scores': [], 'embeddings': []}
    
    for wav in arr:
        scores, embeddings, spectrogram = model(wav)
        
        # Append the results to the respective lists in the dictionary
        feature[name]['scores'].append(np.array(scores))
        feature[name]['embeddings'].append(np.array(embeddings))

In [17]:
shape_feat = {}

for class_name in feature.keys():
    for cat, lst in feature[class_name].items():
        shapes = [arr.shape for arr in lst]
        shapes_count = collections.Counter(shapes)
        shape_feat[f'{class_name} {cat} shape counts'] = shapes_count

In [18]:
shape_feat

{'bpain scores shape counts': Counter({(10, 521): 16}),
 'bpain embeddings shape counts': Counter({(10, 1024): 16}),
 'discomf scores shape counts': Counter({(10, 521): 64}),
 'discomf embeddings shape counts': Counter({(10, 1024): 64}),
 'hungry scores shape counts': Counter({(10, 521): 100}),
 'hungry embeddings shape counts': Counter({(10, 1024): 100}),
 'tired scores shape counts': Counter({(10, 521): 24}),
 'tired embeddings shape counts': Counter({(10, 1024): 24})}

## Padding

In [19]:
from statistics import mean

temp = [521, 1024, 157]
avg = int(mean(temp))

In [20]:
avg

567

In [21]:
feature

{'bpain': {'scores': [array([[1.8923607e-01, 8.1816152e-02, 7.4701820e-04, ..., 3.4406348e-05,
           1.4851378e-05, 9.7579046e-12],
          [7.5619765e-02, 9.7882688e-02, 2.3008314e-04, ..., 1.2285819e-05,
           1.0424800e-05, 1.7672847e-12],
          [5.5066068e-02, 2.7638804e-02, 2.3326324e-03, ..., 1.1782972e-02,
           1.0509128e-03, 2.2492759e-06],
          ...,
          [1.8726854e-02, 6.9446499e-03, 1.0956227e-03, ..., 7.8630704e-04,
           1.0553618e-03, 2.0163442e-08],
          [2.6560312e-03, 1.9577043e-03, 3.6567948e-05, ..., 1.1392930e-06,
           9.2660997e-07, 2.1578300e-13],
          [6.3905009e-04, 1.9090835e-04, 7.2224864e-08, ..., 3.1179662e-10,
           3.2292671e-10, 2.3303013e-21]], dtype=float32),
   array([[8.89693387e-03, 6.46073022e-04, 4.13316684e-06, ...,
           1.54326437e-04, 2.28323101e-04, 2.45415026e-06],
          [3.61305773e-02, 4.11886275e-02, 4.53640823e-04, ...,
           4.14526265e-04, 1.37799099e-04, 1.65135816

In [22]:
mfcc_audio

{'bpain': [array([[-702.9974  , -702.9974  , -702.9974  , ..., -276.34793 ,
          -267.16507 , -255.30714 ],
         [   0.      ,    0.      ,    0.      , ...,  132.0378  ,
           139.16156 ,  120.704735],
         [   0.      ,    0.      ,    0.      , ..., -177.5227  ,
          -182.65895 , -174.4534  ],
         ...,
         [   0.      ,    0.      ,    0.      , ...,  -40.679985,
           -38.34161 ,  -24.799019],
         [   0.      ,    0.      ,    0.      , ...,  -12.531725,
           -11.875511,   -5.576102],
         [   0.      ,    0.      ,    0.      , ...,   16.384293,
            17.668966,   22.335873]], dtype=float32),
  array([[-543.16656  , -565.36835  , -706.7276   , ..., -410.95627  ,
          -405.01205  , -414.52274  ],
         [  94.93184  ,   85.98862  ,   15.233383 , ...,  171.69666  ,
           173.57571  ,  166.84764  ],
         [ -41.115074 ,  -33.18654  ,    3.12191  , ...,  -69.439835 ,
           -70.61903  ,  -62.326843 ],
      

In [23]:
for class_name, lst in mfcc_audio.items():
    feature[class_name]['mfcc'] = lst

In [24]:
shape_feat_3d = {}

for class_name in feature.keys():
    for cat, lst in feature[class_name].items():
        shapes = [arr.shape for arr in lst]
        shapes_count = collections.Counter(shapes)
        shape_feat_3d[f'{class_name} {cat} shape counts'] = shapes_count

In [25]:
shape_feat_3d

{'bpain scores shape counts': Counter({(10, 521): 16}),
 'bpain embeddings shape counts': Counter({(10, 1024): 16}),
 'bpain mfcc shape counts': Counter({(10, 157): 16}),
 'discomf scores shape counts': Counter({(10, 521): 64}),
 'discomf embeddings shape counts': Counter({(10, 1024): 64}),
 'discomf mfcc shape counts': Counter({(10, 157): 64}),
 'hungry scores shape counts': Counter({(10, 521): 100}),
 'hungry embeddings shape counts': Counter({(10, 1024): 100}),
 'hungry mfcc shape counts': Counter({(10, 157): 100}),
 'tired scores shape counts': Counter({(10, 521): 24}),
 'tired embeddings shape counts': Counter({(10, 1024): 24}),
 'tired mfcc shape counts': Counter({(10, 157): 24})}

In [26]:
reshaped = {}

for class_name, categories in feature.items():
    for cat, lst in categories.items():
        temp = []
        for arr in lst:
            h, w = arr.shape
            size = h * w

            if size == 5210 or size == 1570:
                padded = np.pad(arr, pad_width=((0, 0), (0, 1024 - w)), mode='constant', constant_values=0)
                temp.append(padded)
            elif size == 10240:
                temp.append(arr)
        
        reshaped[f'{class_name} {cat}'] = temp

In [27]:
reshaped

{'bpain scores': [array([[1.8923607e-01, 8.1816152e-02, 7.4701820e-04, ..., 0.0000000e+00,
          0.0000000e+00, 0.0000000e+00],
         [7.5619765e-02, 9.7882688e-02, 2.3008314e-04, ..., 0.0000000e+00,
          0.0000000e+00, 0.0000000e+00],
         [5.5066068e-02, 2.7638804e-02, 2.3326324e-03, ..., 0.0000000e+00,
          0.0000000e+00, 0.0000000e+00],
         ...,
         [1.8726854e-02, 6.9446499e-03, 1.0956227e-03, ..., 0.0000000e+00,
          0.0000000e+00, 0.0000000e+00],
         [2.6560312e-03, 1.9577043e-03, 3.6567948e-05, ..., 0.0000000e+00,
          0.0000000e+00, 0.0000000e+00],
         [6.3905009e-04, 1.9090835e-04, 7.2224864e-08, ..., 0.0000000e+00,
          0.0000000e+00, 0.0000000e+00]], dtype=float32),
  array([[8.89693387e-03, 6.46073022e-04, 4.13316684e-06, ...,
          0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
         [3.61305773e-02, 4.11886275e-02, 4.53640823e-04, ...,
          0.00000000e+00, 0.00000000e+00, 0.00000000e+00],
         [2.9

In [28]:
shape_count = {}

for class_name, lst in reshaped.items():
    shapes = [arr.shape for arr in lst]
    shapes_count = collections.Counter(shapes)
    shape_count[f'{class_name} shape counts'] = shapes_count

In [29]:
shape_count

{'bpain scores shape counts': Counter({(10, 1024): 16}),
 'bpain embeddings shape counts': Counter({(10, 1024): 16}),
 'bpain mfcc shape counts': Counter({(10, 1024): 16}),
 'discomf scores shape counts': Counter({(10, 1024): 64}),
 'discomf embeddings shape counts': Counter({(10, 1024): 64}),
 'discomf mfcc shape counts': Counter({(10, 1024): 64}),
 'hungry scores shape counts': Counter({(10, 1024): 100}),
 'hungry embeddings shape counts': Counter({(10, 1024): 100}),
 'hungry mfcc shape counts': Counter({(10, 1024): 100}),
 'tired scores shape counts': Counter({(10, 1024): 24}),
 'tired embeddings shape counts': Counter({(10, 1024): 24}),
 'tired mfcc shape counts': Counter({(10, 1024): 24})}

In [30]:
# Function to combine the data for each category into a 3-channel format
def combine_data(category):
    score_key = f'{category} scores'
    embeddings_key = f'{category} embeddings'
    mfcc_key = f'{category} mfcc'
    return np.stack((reshaped[score_key], reshaped[embeddings_key], reshaped[mfcc_key]), axis=-1)

In [31]:
# Combine the data for each category
combined_data = {category: combine_data(category) for category in ['bpain', 'discomf', 'hungry', 'tired']}

# Display the shapes of the combined data for verification
for category, data in combined_data.items():
    print(f"{category}: {data.shape}")

# Return the shape of the bpain data
combined_data['bpain'].shape

bpain: (16, 10, 1024, 3)
discomf: (64, 10, 1024, 3)
hungry: (100, 10, 1024, 3)
tired: (24, 10, 1024, 3)


(16, 10, 1024, 3)

In [32]:
from sklearn.model_selection import train_test_split

# Labels for each category
labels = {
    'bpain': 0,
    'discomf': 1,
    'hungry': 2,
    'tired': 3
}

# Combine all data and labels
all_data = []
all_labels = []
for category, data in combined_data.items():
    all_data.append(data)
    all_labels.append(np.full((data.shape[0],), labels[category]))

# Convert lists to numpy arrays
all_data = np.concatenate(all_data)
all_labels = np.concatenate(all_labels)

# Shuffle the data and labels in unison
shuffle_indices = np.random.permutation(np.arange(len(all_labels)))
all_data_shuffled = all_data[shuffle_indices]
all_labels_shuffled = all_labels[shuffle_indices]

# Split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(all_data_shuffled, all_labels_shuffled, test_size=0.2, random_state=42)

# Verifying the shapes of the resulting arrays
x_train.shape, y_train.shape, x_test.shape, y_test.shape

((163, 10, 1024, 3), (163,), (41, 10, 1024, 3), (41,))

## Model Training

In [55]:
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, BatchNormalization, Flatten, Dense, MaxPooling2D, LeakyReLU, Dropout
from tensorflow.keras.regularizers import l2, l1

# Define the input shape
input_shape = (10, 1024, 3)

# Create a sequential model
model = Sequential()

# Add a convolutional layer with LeakyReLU activation
model.add(Conv2D(filters=32, kernel_size=(2, 2), input_shape=input_shape))
model.add(LeakyReLU(alpha=0.022))  # You can adjust the alpha value as needed
model.add(BatchNormalization())

# Add another convolutional layer with LeakyReLU activation
model.add(Conv2D(filters=64, kernel_size=(2, 2)))
model.add(LeakyReLU(alpha=0.022))  # Alpha value can be adjusted
model.add(BatchNormalization())

model.add(Conv2D(filters=128, kernel_size=(2, 2)))
model.add(LeakyReLU(alpha=0.022))  # Alpha value can be adjusted
model.add(BatchNormalization())

# Flatten the output of the convolutional layers
model.add(Flatten())

# Add a dense layer with LeakyReLU activation
model.add(Dense(units=32, kernel_regularizer=l2(0.001)))
model.add(LeakyReLU(alpha=0.022)) # Adjusting alpha

model.add(Dense(units=16))
model.add(LeakyReLU(alpha=0.022))

# Add the output layer
model.add(Dense(units=4, activation='softmax'))  # Assuming 4 classes as per your data

# Compile the model
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Model summary
model.summary()

Model: "sequential_9"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d_27 (Conv2D)          (None, 9, 1023, 32)       416       
                                                                 
 leaky_re_lu_46 (LeakyReLU)  (None, 9, 1023, 32)       0         
                                                                 
 batch_normalization_27 (Bat  (None, 9, 1023, 32)      128       
 chNormalization)                                                
                                                                 
 conv2d_28 (Conv2D)          (None, 8, 1022, 64)       8256      
                                                                 
 leaky_re_lu_47 (LeakyReLU)  (None, 8, 1022, 64)       0         
                                                                 
 batch_normalization_28 (Bat  (None, 8, 1022, 64)      256       
 chNormalization)                                     

In [56]:
history = model.fit(x_train,
                    y_train,
                    validation_data=(x_test, y_test),
                    epochs=30,
                    batch_size=4)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


In [43]:
model.evaluate(x_test, y_test)



[2.0135486125946045, 0.6341463327407837]