# Modules needed
## Preprocessing
 * pandas
 * wavefile
 * LibROSA
 * numba==0.48.0
 
## Machine learning
 * numpy
 * keras
 * sklearn
 * tensorflow
 * tqdm (just for fun)
 
## To enable progress bars
`jupyter nbextension enable --py widgetsnbextension`
`jupyter labextension install @jupyter-widgets/jupyterlab-manager`

In [32]:
import pandas as pd
from os.path import join as join_path
from wavefile import WaveReader

In [33]:
usl = "../resources/UrbanSound8K/"

us_meta = pd.read_csv(usl + 'metadata/UrbanSound8K.csv')

In [34]:
audio_data = []
for i, entry in us_meta.iterrows():
    file_loc = join_path(usl, "audio", 'fold' + str(entry["fold"]), str(entry["slice_file_name"]))
    with WaveReader(file_loc) as r:
        # Probably easier way with this library to read the bit depth.
        audio_data.append((r.channels, r.samplerate, int((r.byterate) / (r.samplerate * r.channels) * 8)))

audio_df = pd.DataFrame(audio_data, columns=['num_channels', 'sample_rate', 'bit_depth'])

# Summaries of Sample Data

In [35]:
print("Number of channels")
print(audio_df.num_channels.value_counts(normalize=True))

print("\nSample Rates")
print(audio_df.sample_rate.value_counts(normalize=True))

print("\nBit Depth")
print(audio_df.bit_depth.value_counts(normalize=True))

Number of channels
2    0.915369
1    0.084631
Name: num_channels, dtype: float64

Sample Rates
44100     0.614979
48000     0.286532
96000     0.069858
24000     0.009391
16000     0.005153
22050     0.005039
11025     0.004466
192000    0.001947
8000      0.001374
11024     0.000802
32000     0.000458
Name: sample_rate, dtype: float64

Bit Depth
16    0.659414
24    0.315277
32    0.019354
8     0.004924
4     0.001031
Name: bit_depth, dtype: float64


# Preprocess files to be similar to the format being used in odas

In [36]:
import librosa
import librosa.display
import tqdm as tqdm
import numpy as np
from multiprocessing import Pool

def extract_features(file_name):
   
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        mfccsscaled = np.mean(mfccs.T,axis=0)
        
    except Exception as e:
        print("Error encountered while parsing file: ", file_name, e)
        return None 
     
    return mfccsscaled

def process_entry(file_entry):
    file_loc = join_path(usl, "audio", 'fold' + str(entry["fold"]), str(entry["slice_file_name"]))
    class_label = entry["class"]
    return [extract_features(file_loc), class_label];

features = []

print("Starting Extraction")
with Pool(12) as p:
    entries = us_meta.iterrows()
    for _ in tqdm.tqdm(p.imap(process_entry, entries), total=us_meta.shape[0]):
        pass
    #r = p.map(process_entry, entries)
    #print(r)

# for index, entry in tqdm.tqdm(us_meta.iterrows(), total=us_meta.shape[0]):
#     file_loc = join_path(usl, "audio", 'fold' + str(entry["fold"]), str(entry["slice_file_name"]))
#     class_label = entry["class"]
#     features.append(extract_features(file_loc))
#     pass

features_df = pd.DataFrame(features, columns=['feature','class_label'])

print('Finished feature extraction from ', len(features_df), ' files')

Starting Extraction
Finished feature extraction from  0  files


100%|██████████| 8732/8732 [00:24<00:00, 350.63it/s]


# Prep for building model

In [37]:
import numpy as np
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics 