# Load Training Data

In [2]:
import pathlib
import pandas as pd

# set your path here
data_root = pathlib.Path('/Volumes/Clavius/documents/Documents/Employment/NoiseNet/Development/tag_data/sample_set/')
manifest_path = data_root/'01_manifest.pkl'

# read the manifest in
manifest = pd.read_pickle(manifest_path)
manifest.head()

Unnamed: 0,filename,package_hash,manual_tag,tag_set,category_set
0,101729-0-0-1.wav,u8k_fold9,air_conditioner,{air_conditioner},{mechanical}
1,103249-5-0-1.wav,u8k_fold9,engine_idling,{engine_idling},{transport_car}
2,104817-4-0-11.wav,u8k_fold2,drilling,{drilling},{mechanical_construction}
3,104998-7-16-0.wav,u8k_fold5,jackhammer,{jackhammer},{mechanical_construction}
4,104998-7-18-3.wav,u8k_fold5,jackhammer,{jackhammer},{mechanical_construction}


# Category Labels

In [3]:
from sklearn.preprocessing import MultiLabelBinarizer

final_categories = ["animal_dogs", "animal_insects", "animal_birds", "animal_cockatoo", "animal_poultry",  "background","human_voice",  "indeterminate","mechanical", "mechanical_construction", "mechanical_impulsive","nature_wind", "signals_horn", "signals_siren", "transport_car"]
category_encoder = MultiLabelBinarizer().fit([final_categories])
category_encoder.classes_

array(['animal_birds', 'animal_cockatoo', 'animal_dogs', 'animal_insects',
       'animal_poultry', 'background', 'human_voice', 'indeterminate',
       'mechanical', 'mechanical_construction', 'mechanical_impulsive',
       'nature_wind', 'signals_horn', 'signals_siren', 'transport_car'],
      dtype=object)

# Preprocessing and Features

In [89]:
# These spectrogram settings look pretty good from a domain perspective.
# Trying a little bit higher "resolution" than previously
mel_settings = {'fmax': 8000, 'power': 2, 'n_mels' :128, 'n_fft':2048, 'hop_length':512}
fs_nom = 16000 # Nominal sampling rate. Most files should be this rate, but if not, they will be resampled
shape_nom = (128,126) # nominal spectrogram shape

In [92]:
import os
import numpy as np
import soundfile as sf
import librosa
import librosa.display
import sklearn

def force_array_shape(x, force_shape):
    """Forces a numpy array to a specific shape by filling with zeros, or truncating"""
    pad_widths = []
    for ax, ax_length in enumerate(force_shape):
        if x.shape[ax] >= ax_length:
            x = x.take(indices=range(0,ax_length), axis=ax)
        pad_widths.append((0,ax_length-x.shape[ax]))
    x = np.pad(x, pad_widths)
    return x

def get_mels(filepath='', data=[], fs=None, force_shape=None):
    if filepath:
        data, fs = librosa.load(filepath, sr=fs)
        if fs != fs_nom:
            print(filepath)
    else:
        assert (len(data>0) and fs >0), 'Must provide either a filename, or array of data and sample rate'
    
    S = librosa.feature.melspectrogram(y=data, 
                                   sr = fs, **mel_settings)
    
    if force_shape and S.shape != force_shape:
        
        S = force_array_shape(S, force_shape)
            
    return S, fs
 
def load_mels(filepath, force_create=False, save=True):
    mel_path = filepath.with_suffix('.npy')
    
    if mel_path.is_file() and not force_create:
        #print('Loading {}'.format(mel_path))
        mels = np.load(mel_path)
    else:
        #print('Generating from {}'.format(filepath))
        mels, _ = get_mels(filepath, fs=fs_nom, force_shape = shape_nom)
        if save:
            #print('Saving {}'.format(mel_path))
            np.save(mel_path, mels)
    
    return mels

def feature_preprocessing(mel):
    # convert to db and normalise
    power = librosa.core.power_to_db(mel, ref=np.max)
    power = power - np.mean(power)
    power = power / (np.std(power))
    return power[:, :, None]


In [93]:
# generate the features
# note this will store all features in memory, as well as saving them to disk. 
# Can't guarantee it will work for large datasets.
manifest['features'] = manifest.apply(lambda x: data_root/x['package_hash']/x['filename'], axis=1).apply(lambda x: feature_preprocessing(load_mels(x, force_create=True, save=True)))

In [7]:
manifest.iloc[0]['features'].shape

(128, 126, 1)

# Test/Train Split

NB
* Category Supports reflect rough proportions in the large data set. 
* Will have to improve design by balancing classes via oversampling, augmentation, or training weights.

In [94]:
from sklearn.model_selection import train_test_split

X = np.stack(manifest['features'].values)
y = category_encoder.transform(manifest['category_set'].values)

print('Category Support')
for c,n in zip(category_encoder.classes_, y.sum(axis=0)):
    print('{:30s}{} : {}'.format(c, category_encoder.transform([[c]]), n) )

print()
print(X.shape)
print(y.shape)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

print(X_train.shape, X_test.shape)
print(y_train.shape, y_test.shape)



Category Support
animal_birds                  [[1 0 0 0 0 0 0 0 0 0 0 0 0 0 0]] : 2382
animal_cockatoo               [[0 1 0 0 0 0 0 0 0 0 0 0 0 0 0]] : 6
animal_dogs                   [[0 0 1 0 0 0 0 0 0 0 0 0 0 0 0]] : 2229
animal_insects                [[0 0 0 1 0 0 0 0 0 0 0 0 0 0 0]] : 677
animal_poultry                [[0 0 0 0 1 0 0 0 0 0 0 0 0 0 0]] : 12
background                    [[0 0 0 0 0 1 0 0 0 0 0 0 0 0 0]] : 915
human_voice                   [[0 0 0 0 0 0 1 0 0 0 0 0 0 0 0]] : 964
indeterminate                 [[0 0 0 0 0 0 0 1 0 0 0 0 0 0 0]] : 17
mechanical                    [[0 0 0 0 0 0 0 0 1 0 0 0 0 0 0]] : 2099
mechanical_construction       [[0 0 0 0 0 0 0 0 0 1 0 0 0 0 0]] : 35
mechanical_impulsive          [[0 0 0 0 0 0 0 0 0 0 1 0 0 0 0]] : 4
nature_wind                   [[0 0 0 0 0 0 0 0 0 0 0 1 0 0 0]] : 856
signals_horn                  [[0 0 0 0 0 0 0 0 0 0 0 0 1 0 0]] : 7
signals_siren                 [[0 0 0 0 0 0 0 0 0 0 0 0 0 1 0]] : 7
transport_c