In [2]:
import tensorflow as tf
import numpy as np
import librosa
import os
from pydub import AudioSegment
from pydub.utils import make_chunks

In [5]:
DATA_PATH = "../../data/11chunks/"
SAMPLE_RATE = 48000
MELSPECT_OR_MFCC = "melspect" # Choose "melspect" or "mfcc" to train on either data type.
N_MFCC = 25
CHUNK_LENGTH_MS = 1000 # 1 second

mapping = ["none", "whine"]
audio = []

for dirpath, dirnames, filenames in os.walk(DATA_PATH):
    for j, file in enumerate(filenames):
        file_path = os.path.join(dirpath, file)
        signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)
        if (len(signal) < SAMPLE_RATE):
            signal = np.pad(signal, (0, SAMPLE_RATE-len(signal)))
            if (signal.shape != (48000,)):
                print("Error, bad signal shape.")
        if (MELSPECT_OR_MFCC == "melspect"):
            melspect = librosa.feature.melspectrogram(y=signal)
            audio.append(melspect.tolist())
        elif (MELSPECT_OR_MFCC == "mfcc"):
            mfcc = librosa.feature.mfcc(y=signal, sr=SAMPLE_RATE, n_mfcc=N_MFCC)
            mfcc = mfcc.T
            audio.append(mfcc.tolist())
        else:
            exit('Set MELSPECT_OR_MFCC to either "melspect" or "mfcc".')
            
        if (j % 25 == 0):
            print(f"Working file path: {file_path}")

Working file path: ../../data/11chunks/11chunk00000.wav
Working file path: ../../data/11chunks/11chunk00025.wav
Working file path: ../../data/11chunks/11chunk00050.wav
Working file path: ../../data/11chunks/11chunk00075.wav
Working file path: ../../data/11chunks/11chunk00100.wav
Working file path: ../../data/11chunks/11chunk00125.wav
Working file path: ../../data/11chunks/11chunk00150.wav
Working file path: ../../data/11chunks/11chunk00175.wav
Working file path: ../../data/11chunks/11chunk00200.wav
Working file path: ../../data/11chunks/11chunk00225.wav
Working file path: ../../data/11chunks/11chunk00250.wav
Working file path: ../../data/11chunks/11chunk00275.wav
Working file path: ../../data/11chunks/11chunk00300.wav


In [3]:
model = tf.keras.models.load_model('whine-cry.model')

In [6]:
predictions = model.predict(audio)
print(predictions)

[[2.0910708e-04]
 [3.6437982e-01]
 [6.7493193e-02]
 [4.0873628e-02]
 [1.5918334e-01]
 [4.4086263e-01]
 [2.7891807e-02]
 [2.8217124e-04]
 [4.4086263e-01]
 [3.7802666e-02]
 [9.4306041e-03]
 [1.1843972e-01]
 [1.0234042e-03]
 [2.9712392e-04]
 [2.0425759e-02]
 [6.1129604e-04]
 [9.9814497e-03]
 [3.2841702e-04]
 [1.5003163e-03]
 [6.3767447e-04]
 [4.4086263e-01]
 [4.4086263e-01]
 [1.1181854e-01]
 [7.7895483e-04]
 [5.3884910e-04]
 [3.9104722e-02]
 [4.4086263e-01]
 [4.4086263e-01]
 [4.4086263e-01]
 [4.4086263e-01]
 [4.4086263e-01]
 [1.6477840e-01]
 [4.4086263e-01]
 [4.4086263e-01]
 [7.0709898e-03]
 [1.9597496e-04]
 [3.6637455e-02]
 [4.1490853e-01]
 [4.4086263e-01]
 [4.4086263e-01]
 [7.9523779e-02]
 [4.4086263e-01]
 [4.4086263e-01]
 [4.7287357e-04]
 [1.4548412e-03]
 [5.7432283e-02]
 [4.4086263e-01]
 [4.4086263e-01]
 [1.9528668e-02]
 [1.9657701e-02]
 [4.6970419e-04]
 [4.0186089e-02]
 [3.2377836e-01]
 [1.8331786e-03]
 [4.4086263e-01]
 [4.4086263e-01]
 [2.8473839e-01]
 [9.8893419e-04]
 [4.4086263e-0