In [46]:
import numpy as np
import pandas as pd
import glob
import os
import matplotlib.pyplot as plt
import librosa
from IPython.display import Audio
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.utils import shuffle
import tensorflow as tf

In [47]:
train_folder_path = r'C:../dataset/audioonly/labeled/set 2/Train'
# test_folder_path = r'C:../dataset/audioonly/labeled/set 1/Test'

In [49]:
class_names = next(os.walk(train_folder_path))[1]
class_names = class_names[1:]
class_names

['belly_pain', 'discomfort', 'hungry', 'tired']

In [50]:
audio_path = [os.path.join(train_folder_path, name) for name in class_names]
audio_path

['C:../dataset/audioonly/labeled/set 2/Train\\belly_pain',
 'C:../dataset/audioonly/labeled/set 2/Train\\discomfort',
 'C:../dataset/audioonly/labeled/set 2/Train\\hungry',
 'C:../dataset/audioonly/labeled/set 2/Train\\tired']

In [51]:
bpain_audio = glob.glob(os.path.join(audio_path[0], '*.wav'))
discomf_audio = glob.glob(os.path.join(audio_path[1], '*.wav'))
hungry_audio = glob.glob(os.path.join(audio_path[2], '*.wav'))
tired_audio = glob.glob(os.path.join(audio_path[3], '*.wav'))

In [52]:
audio_path_byclass = {
    'bpain': bpain_audio,
    'discomf': discomf_audio,
    'hungry': hungry_audio,
    'tired': tired_audio
}

In [53]:
class_names_test = next(os.walk(test_folder_path))[1]
class_names_test = class_names_test
class_names_test

['bellypain', 'discomfort', 'hungry', 'tired']

In [10]:
audio_path_test = [os.path.join(test_folder_path, name) for name in class_names_test]
audio_path_test

['C:../dataset/audioonly/labeled/set 1/Test\\bellypain',
 'C:../dataset/audioonly/labeled/set 1/Test\\discomfort',
 'C:../dataset/audioonly/labeled/set 1/Test\\hungry',
 'C:../dataset/audioonly/labeled/set 1/Test\\tired']

In [11]:
bpain_audio_test = glob.glob(os.path.join(audio_path_test[0], '*.wav'))
discomf_audio_test = glob.glob(os.path.join(audio_path_test[1], '*.wav'))
hungry_audio_test = glob.glob(os.path.join(audio_path_test[2], '*.wav'))
tired_audio_test = glob.glob(os.path.join(audio_path_test[3], '*.wav'))

In [12]:
audio_testnum_byclass = {
    'bpain': bpain_audio_test,
    'discomf': discomf_audio_test,
    'hungry': hungry_audio_test,
    'tired': tired_audio_test
}

## Load Audio

In [54]:
# Replace 'your_audio_file.mp3' with the path to your audio file
load_audio = {}
for name, pathlst in audio_path_byclass.items():
    loaded = []
    for path in pathlst:
        audio, sr = librosa.load(path, sr=16000, mono=True, duration=5.2)
        loaded.append(audio)
    load_audio[name] = loaded

In [55]:
# check if all values between -1.0 and -1.0
for name, arr in load_audio.items():
    for val in arr:
        if np.all((-1 <= val) & (val <= 1)):
            print(f'{name} arr is between -1 and 1')

bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr is between -1 and 1
bpain arr 

## Load Model

In [56]:
# Load the model.
import tensorflow_hub as hub
model = hub.load('https://tfhub.dev/google/yamnet/1')

In [57]:
# Run the model, check the output.
feature = {}
for name, arr in load_audio.items():
    # Initialize dictionaries for each class if not already existing
    if name not in feature:
        feature[name] = {'scores': [], 'embeddings': [], 'spectrograms': []}
    
    for wav in arr:
        scores, embeddings, spectrogram = model(wav)
        
        # Append the results to the respective lists in the dictionary
        feature[name]['scores'].append(np.array(scores))
        feature[name]['embeddings'].append(np.array(embeddings))
        feature[name]['spectrograms'].append(np.array(spectrogram))

In [58]:
import collections

shape = {}

for class_name in feature.keys():
    for cat, lst in feature[class_name].items():
        shapes = [arr.shape for arr in lst]
        shapes_count = collections.Counter(shapes)
        shape[f'{class_name} {cat} shape counts'] = shapes_count

In [59]:
shape

{'bpain scores shape counts': Counter({(10, 521): 69}),
 'bpain embeddings shape counts': Counter({(10, 1024): 69}),
 'bpain spectrograms shape counts': Counter({(528, 64): 69}),
 'discomf scores shape counts': Counter({(10, 521): 94}),
 'discomf embeddings shape counts': Counter({(10, 1024): 94}),
 'discomf spectrograms shape counts': Counter({(528, 64): 94}),
 'hungry scores shape counts': Counter({(10, 521): 100}),
 'hungry embeddings shape counts': Counter({(10, 1024): 100}),
 'hungry spectrograms shape counts': Counter({(528, 64): 100}),
 'tired scores shape counts': Counter({(10, 521): 69}),
 'tired embeddings shape counts': Counter({(10, 1024): 69}),
 'tired spectrograms shape counts': Counter({(528, 64): 69})}

## nd array padding
- 각 클래스 카테고리마다 평균 row를 구한 후 padding 해준다

In [60]:
shape['bpain scores shape counts'].items()

dict_items([((10, 521), 69)])

In [61]:
from statistics import mean

temp = []

for class_cat, counter in shape.items():
    for shape, count in counter.items():
        h, w = shape
        num_elements = h * w
        temp.append(num_elements)

In [62]:
avg = mean(temp)

In [63]:
avg

16414

In [64]:
arr = np.random.randint(5, size = (2,5))
arr

array([[4, 4, 2, 0, 1],
       [2, 0, 2, 1, 4]])

In [65]:
np.pad(arr, pad_width=((2,1), (1,2)), mode='constant', constant_values = 0)

array([[0, 0, 0, 0, 0, 0, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0],
       [0, 4, 4, 2, 0, 1, 0, 0],
       [0, 2, 0, 2, 1, 4, 0, 0],
       [0, 0, 0, 0, 0, 0, 0, 0]])

In [66]:
feature

{'bpain': {'scores': [array([[1.8923607e-01, 8.1816152e-02, 7.4701820e-04, ..., 3.4406348e-05,
           1.4851378e-05, 9.7579046e-12],
          [7.5619765e-02, 9.7882688e-02, 2.3008314e-04, ..., 1.2285819e-05,
           1.0424800e-05, 1.7672847e-12],
          [5.5066068e-02, 2.7638804e-02, 2.3326324e-03, ..., 1.1782972e-02,
           1.0509128e-03, 2.2492759e-06],
          ...,
          [1.8726854e-02, 6.9446499e-03, 1.0956227e-03, ..., 7.8630704e-04,
           1.0553618e-03, 2.0163442e-08],
          [2.6560312e-03, 1.9577043e-03, 3.6567948e-05, ..., 1.1392930e-06,
           9.2660997e-07, 2.1578300e-13],
          [6.3905009e-04, 1.9090835e-04, 7.2224864e-08, ..., 3.1179662e-10,
           3.2292671e-10, 2.3303013e-21]], dtype=float32),
   array([[8.89693387e-03, 6.46073022e-04, 4.13316684e-06, ...,
           1.54326437e-04, 2.28323101e-04, 2.45415026e-06],
          [3.61305773e-02, 4.11886275e-02, 4.53640823e-04, ...,
           4.14526265e-04, 1.37799099e-04, 1.65135816

In [67]:
reshaped = {}

for class_name, categories in feature.items():
    for cat, lst in categories.items():
        temp = []
        for arr in lst:
            h, w = arr.shape
            flat_arr = arr.flatten()
            size = h * w

            if size == 5210 or size == 10240:
                padded = np.pad(flat_arr, pad_width=(0, avg - size), mode='constant', constant_values=0)
                temp.append(padded)
            elif size == 33792:
                trunc_arr = flat_arr[:avg]
                temp.append(trunc_arr)
        
        reshaped[f'{class_name} {cat}'] = temp

In [68]:
reshaped

{'bpain scores': [array([0.18923607, 0.08181615, 0.00074702, ..., 0.        , 0.        ,
         0.        ], dtype=float32),
  array([8.896934e-03, 6.460730e-04, 4.133167e-06, ..., 0.000000e+00,
         0.000000e+00, 0.000000e+00], dtype=float32),
  array([0.47045875, 0.05031064, 0.00105331, ..., 0.        , 0.        ,
         0.        ], dtype=float32),
  array([0.00965345, 0.00096967, 0.00051121, ..., 0.        , 0.        ,
         0.        ], dtype=float32),
  array([0.19759008, 0.0307804 , 0.00051608, ..., 0.        , 0.        ,
         0.        ], dtype=float32),
  array([0.03303082, 0.00742492, 0.00103915, ..., 0.        , 0.        ,
         0.        ], dtype=float32),
  array([0.14313875, 0.26384673, 0.00072235, ..., 0.        , 0.        ,
         0.        ], dtype=float32),
  array([0.03311558, 0.02228799, 0.00158001, ..., 0.        , 0.        ,
         0.        ], dtype=float32),
  array([0.24430196, 0.18956037, 0.002162  , ..., 0.        , 0.        ,
  

In [69]:
shape_count = {}

for class_name, lst in reshaped.items():
    shapes = [arr.shape for arr in lst]
    shapes_count = collections.Counter(shapes)
    shape_count[f'{class_name} shape counts'] = shapes_count

In [70]:
shape_count

{'bpain scores shape counts': Counter({(16414,): 69}),
 'bpain embeddings shape counts': Counter({(16414,): 69}),
 'bpain spectrograms shape counts': Counter({(16414,): 69}),
 'discomf scores shape counts': Counter({(16414,): 94}),
 'discomf embeddings shape counts': Counter({(16414,): 94}),
 'discomf spectrograms shape counts': Counter({(16414,): 94}),
 'hungry scores shape counts': Counter({(16414,): 100}),
 'hungry embeddings shape counts': Counter({(16414,): 100}),
 'hungry spectrograms shape counts': Counter({(16414,): 100}),
 'tired scores shape counts': Counter({(16414,): 69}),
 'tired embeddings shape counts': Counter({(16414,): 69}),
 'tired spectrograms shape counts': Counter({(16414,): 69})}

In [71]:
# Initialize a dictionary to store the concatenated arrays
concatenated_arrays = {}

# Iterate over each class
for class_prefix in ['bpain', 'discomf', 'hungry', 'tired']:
    scores = reshaped[f'{class_prefix} scores']
    embeddings = reshaped[f'{class_prefix} embeddings']
    spectrograms = reshaped[f'{class_prefix} spectrograms']

    # Ensure all lists are the same length
    if len(scores) == len(embeddings) == len(spectrograms):
        concatenated_arrays[class_prefix] = []

        # Concatenate corresponding arrays from each category
        for i in range(len(scores)):
            concatenated = np.concatenate([scores[i], embeddings[i], spectrograms[i]])
            concatenated_arrays[class_prefix].append(concatenated)
    else:
        print(f"Warning: Mismatch in lengths for {class_prefix} class")

In [72]:
concatenated_arrays

{'bpain': [array([ 1.8923607e-01,  8.1816152e-02,  7.4701820e-04, ...,
         -6.1820951e+00, -5.7947135e+00, -5.4088645e+00], dtype=float32),
  array([ 8.8969339e-03,  6.4607302e-04,  4.1331668e-06, ...,
         -3.3297453e+00, -3.8977821e+00, -3.4433036e+00], dtype=float32),
  array([ 4.7045875e-01,  5.0310638e-02,  1.0533088e-03, ...,
         -1.9160380e+00, -1.1971490e+00, -2.7884889e+00], dtype=float32),
  array([ 9.6534491e-03,  9.6966873e-04,  5.1121454e-04, ...,
         -1.5630729e+00, -9.5865834e-01, -1.1787437e+00], dtype=float32),
  array([1.9759008e-01, 3.0780401e-02, 5.1608140e-04, ..., 7.6885039e-01,
         8.8494784e-01, 1.0290134e+00], dtype=float32),
  array([ 3.3030823e-02,  7.4249213e-03,  1.0391541e-03, ...,
         -2.0235295e+00, -2.1984284e+00, -1.9023801e+00], dtype=float32),
  array([ 1.4313875e-01,  2.6384673e-01,  7.2234502e-04, ...,
         -2.3908720e+00, -1.7238010e+00, -1.9984339e+00], dtype=float32),
  array([ 3.3115577e-02,  2.2287989e-02,  1.5

In [73]:
concat_count = {}

for class_name, lst in concatenated_arrays.items():
    shapes = [arr.shape for arr in lst]
    shapes_count = collections.Counter(shapes)
    concat_count[f'{class_name} shape counts'] = shapes_count

In [74]:
concat_count

{'bpain shape counts': Counter({(49242,): 69}),
 'discomf shape counts': Counter({(49242,): 94}),
 'hungry shape counts': Counter({(49242,): 100}),
 'tired shape counts': Counter({(49242,): 69})}

In [75]:
from sklearn.model_selection import train_test_split

# Combine data and labels
data = []
labels = []
for class_name, arrays in concatenated_arrays.items():
    data.extend(arrays)
    labels.extend([class_name] * len(arrays))

# Convert to numpy arrays if not already
data = np.array(data)
labels = np.array(labels)

# Shuffle and split the data into training and testing sets
x_train, x_test, y_train, y_test = train_test_split(data, labels, test_size=0.2, random_state=42)

In [76]:
from sklearn.preprocessing import LabelEncoder

label_encoder = LabelEncoder()
y_train = label_encoder.fit_transform(y_train)
y_test = label_encoder.transform(y_test)

In [77]:
print(x_train.shape)
print(y_train.shape)
print(x_test.shape)
print(y_test.shape)

(265, 49242)
(265,)
(67, 49242)
(67,)


In [91]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, BatchNormalization, Activation

model = Sequential([
    Dense(128, input_shape=(49242,)),  # First hidden layer
    BatchNormalization(),              # Batch normalization for the first hidden layer
    Activation('sigmoid'),                # Activation for the first hidden layer

    Dense(64),                        # Second hidden layer
    BatchNormalization(),              # Batch normalization for the second hidden layer
    Activation('sigmoid'),                # Activation for the second hidden layer

    Dense(32),                         # Third hidden layer
    BatchNormalization(),              # Batch normalization for the third hidden layer
    Activation('sigmoid'),             # Activation for the third hidden layer

    Dense(16),                         # Fourth hidden layer
    BatchNormalization(),              # Batch normalization for the fourth hidden layer
    Activation('sigmoid'),

    Dense(4, activation='softmax')     # Output layer
])

# Compile the model
model.compile(optimizer='adam', 
              loss='sparse_categorical_crossentropy', 
              metrics=['accuracy'])

# Summary of the model
model.summary()

Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_10 (Dense)            (None, 128)               6303104   
                                                                 
 batch_normalization_8 (Batc  (None, 128)              512       
 hNormalization)                                                 
                                                                 
 activation_8 (Activation)   (None, 128)               0         
                                                                 
 dense_11 (Dense)            (None, 64)                8256      
                                                                 
 batch_normalization_9 (Batc  (None, 64)               256       
 hNormalization)                                                 
                                                                 
 activation_9 (Activation)   (None, 64)               

In [92]:
history = model.fit(
    x_train,
    y_train,
    batch_size=4,
    epochs=35,
    validation_data=(x_test, y_test)
)

Epoch 1/35
Epoch 2/35
Epoch 3/35
Epoch 4/35
Epoch 5/35
Epoch 6/35
Epoch 7/35
Epoch 8/35
Epoch 9/35
Epoch 10/35
Epoch 11/35
Epoch 12/35
Epoch 13/35
Epoch 14/35
Epoch 15/35
Epoch 16/35
Epoch 17/35
Epoch 18/35
Epoch 19/35
Epoch 20/35
Epoch 21/35
Epoch 22/35
Epoch 23/35
Epoch 24/35
Epoch 25/35
Epoch 26/35
Epoch 27/35
Epoch 28/35
Epoch 29/35
Epoch 30/35
Epoch 31/35
Epoch 32/35
Epoch 33/35
Epoch 34/35
Epoch 35/35


In [80]:
model.evaluate(x_test, y_test)



[1.30979323387146, 0.5223880410194397]

## Random Forest

In [90]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score

# Create a Random Forest classifier model
model = RandomForestClassifier(n_estimators=75, random_state=42)

# Train the model
model.fit(x_train, y_train)

# Predict the labels for the test set
y_pred = model.predict(x_test)

# Evaluate the accuracy of the model
accuracy = accuracy_score(y_test, y_pred)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 65.67%
