In [1]:
import numpy as np
import pandas as pd
import glob
import os
import matplotlib.pyplot as plt
import librosa
from IPython.display import Audio
import tensorflow as tf

In [2]:
data_dir = r'C:../dataset/audioonly/labeled/original_dataset'
os.path.exists(data_dir)

True

In [3]:
class_names = next(os.walk(data_dir))[1]
class_names

['belly_pain', 'discomfort', 'hungry', 'tired']

In [4]:
audio_path = [os.path.join(data_dir, name) for name in class_names]
audio_path

['C:../dataset/audioonly/labeled/original_dataset\\belly_pain',
 'C:../dataset/audioonly/labeled/original_dataset\\discomfort',
 'C:../dataset/audioonly/labeled/original_dataset\\hungry',
 'C:../dataset/audioonly/labeled/original_dataset\\tired']

In [5]:
bpain_audio = glob.glob(os.path.join(audio_path[0], '*.wav'))
discomf_audio = glob.glob(os.path.join(audio_path[1], '*.wav'))
hungry_audio = glob.glob(os.path.join(audio_path[2], '*.wav'))
tired_audio = glob.glob(os.path.join(audio_path[3], '*.wav'))

In [6]:
audio_path_byclass = {
    'bpain': bpain_audio,
    'discomf': discomf_audio,
    'hungry': hungry_audio,
    'tired': tired_audio
}

## **convert to mel spectrogram**

In [7]:
mel_spec_convert = {}

for name, pathlst in audio_path_byclass.items():
    mel_spec = []
    for path in pathlst:
        audio, srate = librosa.load(path, sr=None, duration=5.0)
        ml_spc = librosa.feature.melspectrogram(y=audio, sr=srate)
        mel_spec.append(ml_spc)
    mel_spec_convert[name] = mel_spec

In [8]:
import collections

shape = {}

for class_name, lst in mel_spec_convert.items():
    shapes = [arr.shape for arr in lst]
    shapes_count = collections.Counter(shapes)
    shape[f'{class_name} mel spec shape counts'] = shapes_count

In [9]:
shape

{'bpain mel spec shape counts': Counter({(128, 469): 31, (128, 79): 16}),
 'discomf mel spec shape counts': Counter({(128, 79): 35,
          (128, 431): 28,
          (128, 430): 1}),
 'hungry mel spec shape counts': Counter({(128, 79): 382,
          (128, 431): 26,
          (128, 430): 2}),
 'tired mel spec shape counts': Counter({(128, 79): 24, (128, 469): 1})}

In [12]:
from statistics import mean

avg_lst = []

for cat, dict in shape.items():
    temp = []
    for shape_tuple in dict.keys():
        h, w = shape_tuple
        temp.append(w)
    avg = mean(temp)
    avg_lst.append(avg)

avg_all = int(mean(avg_lst))
print(avg_all)

293


## **Mel Spectrogram Padding and reshape**

In [16]:
reshaped = {}

for class_name, lst in mel_spec_convert.items():
    temp = []
    for arr in lst:
        h, w = arr.shape
        
        if w < avg_all:
            pad_width = (avg_all - w) // 2
            padded = np.pad(arr, pad_width=((0, 0), (pad_width, 293 - w - pad_width)), mode='constant', constant_values=0)
            temp.append(padded)
        else:
            trunc_arr = arr[:, :293]
            temp.append(trunc_arr)
    
    reshaped[f'{class_name} mel spec'] = temp

In [17]:
reshape_count = {}

for class_name, lst in reshaped.items():
    shapes = [arr.shape for arr in lst]
    shapes_count = collections.Counter(shapes)
    reshape_count[f'{class_name} mel spec shape counts'] = shapes_count

In [18]:
reshape_count

{'bpain mel spec mel spec shape counts': Counter({(128, 293): 47}),
 'discomf mel spec mel spec shape counts': Counter({(128, 293): 64}),
 'hungry mel spec mel spec shape counts': Counter({(128, 293): 410}),
 'tired mel spec mel spec shape counts': Counter({(128, 293): 25})}