### Preprocessing audio

In [1]:
import librosa
import numpy as np
import pandas as pd
import librosa.display
import matplotlib as mpl
import matplotlib.pyplot as plt
import os
from os import listdir
from os.path import isfile, join,split

In [2]:
INPUT_AUDIO = os.listdir('./samples')
INPUT_AUDIO.remove('.DS_Store')
INPUT_AUDIO

['Cat3.wav',
 'Birds3.wav',
 'Birds2.wav',
 'Cow1.wav',
 'Cat2.wav',
 'Cow3.wav',
 'Birds1.wav',
 'Cow2.wav',
 'Cat1.wav',
 'Frog1.wav',
 'Frog2.wav',
 'Frog3.wav',
 'Dog1.wav',
 'Dog2.wav',
 'Dog3.wav']

In [3]:
OUTPUT_IMG = "./images/"

In [4]:
class config:
    sampling_rate = 22050
    duration = 5
    samples = sampling_rate * duration
    top_db = 60
    fmin = 20
    fmax =  sampling_rate // 2
    # Spectrogram parameters
    n_mels = 96
    n_fft = 512
    hop_length = 256

In [5]:
def getUnifiedLength(y):
    if len(y) > config.samples: 
        y = y[0:0+config.samples]
    else: 
        y = np.pad(y, (0, config.samples - len(y)), 'constant')
    return y

In [6]:
def full_frame(width=None, height=None):
    mpl.rcParams['savefig.pad_inches'] = 0
    figsize = None if width is None else (width, height)
    fig = plt.figure(figsize=figsize)
    ax = plt.axes([0,0,1,1], frameon=False)
    ax.get_xaxis().set_visible(False)
    ax.get_yaxis().set_visible(False)
    plt.autoscale(tight=True)

In [7]:
c = 0
for f in INPUT_AUDIO:
    file = join('./samples/',f)
    if not isfile(file):
        continue

    c += 1
    filename = split(file)[1]
    #print("Processing audio file:", filename.split('.')[0])
    x,sr = librosa.load(file, sr=44100)
    x, _ = librosa.effects.trim(x)
    x = getUnifiedLength(x)
    spectrogram = librosa.feature.melspectrogram(x,
                                                sr=config.sampling_rate,
                                                n_mels=config.n_mels,
                                                hop_length=config.hop_length,
                                                n_fft=config.n_fft,
                                                fmin=config.fmin,
                                                fmax=config.fmax)
    logmel = librosa.power_to_db(spectrogram, ref=np.max)
    fig = plt.figure()
    full_frame(15, 10)
    librosa.display.specshow(logmel, sr=config.sampling_rate, hop_length=config.hop_length, x_axis='time', y_axis='mel')
    filename = filename.split('.')[0] + '.png'
    plt.savefig(join(OUTPUT_IMG,filename))
    plt.close(fig)
    plt.close('all')

print(c)

15


### Animal-sVGGNet

In [8]:
import keras
from keras.preprocessing.image import img_to_array
from keras.models import load_model
import numpy as np
import pickle
import cv2
import os

Using TensorFlow backend.


In [9]:
mlb = pickle.loads(open('./AS_VGGN.pickle', "rb").read())
model = load_model('./AnimalSounds_SmallerVGGNet.h5')

In [10]:
os.listdir('./images/')

['Dog3.png',
 '.DS_Store',
 'Dog2.png',
 'Dog1.png',
 'Birds1.png',
 'Cow2.png',
 'Cat1.png',
 'Cow3.png',
 'Birds2.png',
 'Cow1.png',
 'Cat2.png',
 'Cat3.png',
 'Birds3.png',
 'Frog3.png',
 'Frog2.png',
 'Frog1.png']

### Testing

#### Case 1

In [11]:
image = cv2.imread('./images/Cow3.png')

# pre-process the image for classification
image = cv2.resize(image, (200, 150))
image = image.astype("float") / 255.0
image = img_to_array(image)
image = np.expand_dims(image, axis=0)
image.shape

(1, 150, 200, 3)

In [12]:
proba = model.predict(image)
top_3 = mlb.classes_[np.argsort(-proba, axis=1)[:, :3]]
top_3

array([['Cow', 'Dog', 'Chirping birds']], dtype=object)

#### Case 2

In [13]:
image2 = cv2.imread('./images/Birds1.png')

# pre-process the image for classification
image2 = cv2.resize(image2, (200, 150))
image2 = image2.astype("float") / 255.0
image2 = img_to_array(image2)
image2 = np.expand_dims(image2, axis=0)
image2.shape

(1, 150, 200, 3)

In [14]:
proba2 = model.predict(image2)
top_3_2 = mlb.classes_[np.argsort(-proba2, axis=1)[:, :3]]
top_3_2

array([['Chirping birds', 'Frog', 'Rooster']], dtype=object)