In [6]:
import tensorflow as tf
import numpy as np
import librosa
import os
from pydub import AudioSegment
from pydub.utils import make_chunks

In [31]:
DATA_PATH = "../../data/11chunks/"
SAMPLE_RATE = 48000
MELSPECT_OR_MFCC = "melspect" # Choose "melspect" or "mfcc" for train on either data type.
N_MFCC = 25
CHUNK_LENGTH_MS = 1000 # 1 second

mapping = ["none", "whine"]
audio = []

for dirpath, dirnames, filenames in os.walk(DATA_PATH):
    for j, file in enumerate(filenames):
        file_path = os.path.join(dirpath, file)
        signal, sr = librosa.load(file_path, sr=SAMPLE_RATE)
        if (len(signal) < SAMPLE_RATE):
            signal = np.pad(signal, (0, SAMPLE_RATE-len(signal)))
            if (signal.shape != (48000,)):
                print("Error, bad signal shape.")
        if (MELSPECT_OR_MFCC == "melspect"):
            melspect = librosa.feature.melspectrogram(y=signal)
            audio.append(melspect.tolist())
        elif (MELSPECT_OR_MFCC == "mfcc"):
            mfcc = librosa.feature.mfcc(y=signal, sr=SAMPLE_RATE, n_mfcc=N_MFCC)
            mfcc = mfcc.T
            audio.append(mfcc.tolist())
        else:
            exit('Set MELSPECT_OR_MFCC to either "melspect" or "mfcc".')
            
        if (j % 25 == 0):
            print(f"Working file path: {file_path}")

Working file path: ../../data/11chunks/11chunk00000.wav
Working file path: ../../data/11chunks/11chunk00025.wav
Working file path: ../../data/11chunks/11chunk00050.wav
Working file path: ../../data/11chunks/11chunk00075.wav
Working file path: ../../data/11chunks/11chunk00100.wav
Working file path: ../../data/11chunks/11chunk00125.wav
Working file path: ../../data/11chunks/11chunk00150.wav
Working file path: ../../data/11chunks/11chunk00175.wav
Working file path: ../../data/11chunks/11chunk00200.wav
Working file path: ../../data/11chunks/11chunk00225.wav
Working file path: ../../data/11chunks/11chunk00250.wav
Working file path: ../../data/11chunks/11chunk00275.wav
Working file path: ../../data/11chunks/11chunk00300.wav


In [46]:
model = tf.keras.models.load_model('whine-cry.model')

In [48]:
predictions = model.predict(audio)
print(predictions)

[[0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20181392]
 [0.20