In [3]:
import tensorflow as tf
from tensorflow import keras
import tensorflow_addons as tfa
import numpy as np
import pathlib
import pandas as pd


In [13]:
# set path to local model directory
model_root_dir = pathlib.Path('../../../models')
model_names = ['SMBCE', 'SMF1', 'LGBCE', 'LGF1', 'BDBCE']

# considered categories
small_cat = ['animal_dogs', 'animal_birds', 'human_voice', 'transport_car', 'mechanical', 'music']
large_cat = ["animal_dogs", "animal_insects", "animal_birds", "animal_cockatoo", "animal_poultry",  "background", "human_voice", "indeterminate", "mechanical", "mechanical_construction", "mechanical_impulsive", "mechanical_plant", "nature_wind", "signals_horn", "signals_siren", "transport_car", 'music']
binary_dogs = ['animal_dogs']
model_categories_lookup = {'SMBCE': small_cat, 'SMF1': small_cat, 'LGBCE': large_cat, 'LGF1': large_cat, 'BDBCE': binary_dogs}

# Read manifest data from path
# set your path here
# alternatively, read in the csv and assign categories yourself (see 01_dataset_curation.ipynb)
data_root = pathlib.Path('/Volumes/Clavius/documents/Documents/Employment/NoiseNet/Development/tag_data/processed') 
manifest_path = data_root/'01_manifest.pkl'

# read the manifest in
manifest = pd.read_pickle(manifest_path)
manifest.head()

Unnamed: 0,filename,package_hash,manual_tag,tag_set,category_set
0,2019-02-19 19_39_18.wav,635_cnnmodel_20190220_verification_package,ba-do,"{do, ba}","{animal_dogs, background}"
1,2019-02-06 13_14_24.wav,635_cnnmodel_20190220_verification_package,ta-do,"{do, ta}","{animal_dogs, human_voice}"
2,2019-02-19 19_46_53.wav,635_cnnmodel_20190220_verification_package,ba-do,"{do, ba}","{animal_dogs, background}"
3,2019-02-17 19_23_04.wav,635_cnnmodel_20190220_verification_package,cr-do,"{cr, do}","{animal_dogs, animal_insects}"
4,2019-02-19 19_03_00.wav,635_cnnmodel_20190220_verification_package,wi-ba,"{wi, ba}","{nature_wind, background}"


In [16]:
from sklearn.preprocessing import MultiLabelBinarizer

class OneVsOtherBinarizer(object):
    # simple dummy class for a fit for purpose one vs others binariser
    # keep a similar api to other binarisers used to avoid modifying code down the track
    def __init__(self, *args, **kwargs):
        pass
    def fit(self, the_one):
        self.the_one = the_one
        self.the_others = 'not_'+the_one
        self.classes_ = np.array([self.the_others, the_one])
        return self
    def transform(self, data):
        _bin = np.array([self.the_one in d for d in data])
        _bin = _bin.astype(np.int32)
        return _bin


def get_category_encoder(categories):
    # return an appropriate encoder for the classification problem
    # again, quite fit for purpose
    if len(categories) == 1:
        return OneVsOtherBinarizer().fit(categories[0])
    else:
        return MultiLabelBinarizer().fit([categories])

# Data Preperation

In [6]:
# These spectrogram settings look pretty good from a domain perspective.
# Trying a little bit higher "resolution" than previously
mel_settings = {'fmax': 8000, 'power': 2, 'n_mels' :128, 'n_fft':2048, 'hop_length':512}
fs_nom = 16000 # Nominal sampling rate. Most files should be this rate, but if not, they will be resampled
shape_nom = (128,126) # nominal spectrogram shape

In [7]:
import os
import numpy as np
import soundfile as sf
import librosa
import librosa.display
import sklearn

def force_array_shape(x, force_shape):
    """Forces a numpy array to a specific shape by filling with zeros, or truncating"""
    pad_widths = []
    for ax, ax_length in enumerate(force_shape):
        if x.shape[ax] >= ax_length:
            x = x.take(indices=range(0,ax_length), axis=ax)
        pad_widths.append((0,ax_length-x.shape[ax]))
    x = np.pad(x, pad_widths)
    return x

def get_mels(filepath='', data=[], fs=None, force_shape=None):
    if filepath:
        data, fs = librosa.load(filepath, sr=fs)
        if fs != fs_nom:
            print(filepath)
    else:
        assert (len(data>0) and fs >0), 'Must provide either a filename, or array of data and sample rate'
    
    S = librosa.feature.melspectrogram(y=data, sr = fs, **mel_settings)
    
    if force_shape and S.shape != force_shape:
        
        S = force_array_shape(S, force_shape)
            
    return S, fs
 
def load_mels(filepath, force_create=False, save=True):
    mel_path = filepath.with_suffix('.npy')
    
    if mel_path.is_file() and not force_create:
        #print('Loading {}'.format(mel_path))
        mels = np.load(mel_path)
    else:
        #print('Generating from {}'.format(filepath))
        mels, _ = get_mels(filepath, fs=fs_nom, force_shape = shape_nom)
        if save:
            #print('Saving {}'.format(mel_path))
            np.save(mel_path, mels)
    
    return mels

def feature_preprocessing(mel):
    # convert to db and normalise
    power = librosa.core.power_to_db(mel, ref=np.max)
    power = power - np.mean(power)
    power = power / (np.std(power))
    return power[:, :, None]


# Model Evaluation Utilities

In [137]:
from sklearn.metrics import classification_report
import matplotlib.pyplot as plt
def select_evaluation_folds(df, n_folds=10):
    # get a random selection of package_hash names for fold selection
    folds = df.groupby('package_hash')
    np.random.seed(0)
    selections = np.random.choice(list(folds.groups.keys()), n_folds, replace=False)
    
    return [folds.get_group(g) for g in selections]

def exact_count(y_true, y_pred):
    # metric to compute the exact match ratio
    # this portion counts 0/1 whether prediction exactly matches target.
    # use ec.numpy().mean() to get the ratio
    y_true = tf.cast(y_true, tf.float32)
    predictions = tf.cast(tf.greater_equal(y_pred, 0.5), tf.float32)
    pred_match = tf.equal(predictions, tf.round(y_true))
    exact_count = tf.math.reduce_min(tf.cast(pred_match, tf.float32), axis=1)
    return exact_count


def classification_report_to_df(report):
    # hack to get around binary classifiers having a non-nested accuracy entry
    # nest it
    if report.get('accuracy'):
        report['accuracy'] = {'accuracy':report['accuracy']}
    df = pd.concat({
        k: pd.DataFrame.from_dict(v, 'index') for k, v in report.items()
    }, 
    axis=0)

    return df
def evaluate_fold(fold,target_names, model):
    # predict all samples in a fold, and return a dataframe structure corresponding to the results, for later processing
    
    # get the audio features
    fold['features'] = fold.apply(lambda x: data_root/x['package_hash']/x['filename'], axis=1).apply(lambda x: feature_preprocessing(load_mels(x, force_create=False, save=True)))

    # sometimes nan's leak in, from bad source data remove them
    fold = fold[~fold['features'].apply(lambda x: np.any(np.isnan(x.flatten())))]
    
    # prepare for prediction
    X = np.stack(fold['features'].values)
    y = category_encoder.transform(fold['category_set'].values)
    y_pred=model.predict(X)

    # get the classification report as a dictionary
    cr = classification_report(y, (y_pred>=0.5).astype(y.dtype), target_names=target_names, output_dict=True)
    # hack in the EMR into the results structure
    cr['EMR'] = {'EMR': exact_count(y, y_pred).numpy().mean()}
    # convert to a dataframe
    cr = classification_report_to_df(cr)
    # prepend the fold name as the 0th index level
    cr = pd.concat({fold['package_hash'].values[0]: cr}, names=['package_hash'])
    return cr

def evaluate_model(df, model, target_names, k_folds=10):
    # perform an evaluation of the model across k_folds 
    selections = select_evaluation_folds(df, n_folds)
    
    results = []
    for fold in selections:
        cr = evaluate_fold(fold, target_names, model)
        results.append(cr)
    return pd.concat(results)

def plot_model_evaluation(results, evaluations=[('EMR', 'EMR'), ('macro avg', 'f1-score'), ('animal_dogs', 'f1-score')], *args, **kwargs):
    # plot the distribution of metric evaluations across all folds. 
    # save image to disk
    plt.figure(figsize=(12,10), dpi=200)
    ax = plt.gca()
    for (lev_1, lev_2) in evaluations:
        # plot the histogram for the particular evaluation into the axis
        results.loc[:,lev_1, lev_2].plot.hist(ax=ax,bins=np.linspace(0,1,30), alpha=0.5)
    # format and plot
    plt.title(kwargs.get('title', ''))
    plt.ylabel('Count')
    plt.xlabel('Metric score')
    plt.legend([lev_1 if lev_1==lev_2 else f'{lev_1} {lev_2}' for lev_1, lev_2 in evaluations])
    plt.savefig('{}.png'.format(kwargs.get('title')))
    plt.close()

# Evaluate the Models

In [139]:
import warnings 
# a few annoying warnings are spit out for various reasons
# turn them off for now.
warnings.filterwarnings("ignore")
for model_name in model_names:
    k=50
    print(f'Evaluating {model_name} on k={k} folds')
    model = keras.models.load_model((model_root_dir/model_name).with_suffix('.hdf5'), compile=False)
    category_encoder = get_category_encoder(model_categories_lookup[model_name])

    results = evaluate_model(manifest,model,category_encoder.classes_,k)
    plot_model_evaluation(results,evaluations = [('macro avg', 'f1-score'), ('animal_dogs', 'f1-score')], title=f'Distribution of metrics for Model {model_name}: k={k} folds')

warnings.filterwarnings("default")

Evaluating BDBCE on k=50 folds
