# Modules needed
## Preprocessing
 * install pandas
 * wavefile
 * matplotplib
 * LibROSA
 * numba==0.48.0
## Machine learning
 * numpy
 * keras
 * sklearn
 * tensorflow
 * tqdm (just for fun)

If you are using anaconda tensorflow should already be setup
`pip install pandas wavefile matplotlib librosa numba==0.48.0 numpy keras sklearn tqdm keras tensorflow-gpu`
 
## To enable progress bars
`jupyter nbextension enable --py widgetsnbextension`
`jupyter labextension install @jupyter-widgets/jupyterlab-manager`

In [1]:
import pandas as pd
from os.path import join as join_path
from wavefile import WaveReader

# Performance Tweaking
How many threads to use for multithreading and such.
Some of the processing takes forever using default single threaded capabilities

In [2]:
max_threads = 24

In [3]:
usl = "../resources/UrbanSound8K/"

us_meta = pd.read_csv(usl + 'metadata/UrbanSound8K.csv')

In [4]:
audio_data = []
for i, entry in us_meta.iterrows():
    file_loc = join_path(usl, "audio", 'fold' + str(entry["fold"]), str(entry["slice_file_name"]))
    with WaveReader(file_loc) as r:
        # Probably easier way with this library to read the bit depth.
        audio_data.append((r.channels, r.samplerate, int((r.byterate) / (r.samplerate * r.channels) * 8)))

audio_df = pd.DataFrame(audio_data, columns=['num_channels', 'sample_rate', 'bit_depth'])

# Summaries of Sample Data

In [5]:
print("Number of channels")
print(audio_df.num_channels.value_counts(normalize=True))

print("\nSample Rates")
print(audio_df.sample_rate.value_counts(normalize=True))

print("\nBit Depth")
print(audio_df.bit_depth.value_counts(normalize=True))

Number of channels
2    0.915369
1    0.084631
Name: num_channels, dtype: float64

Sample Rates
44100     0.614979
48000     0.286532
96000     0.069858
24000     0.009391
16000     0.005153
22050     0.005039
11025     0.004466
192000    0.001947
8000      0.001374
11024     0.000802
32000     0.000458
Name: sample_rate, dtype: float64

Bit Depth
16    0.659414
24    0.315277
32    0.019354
8     0.004924
4     0.001031
Name: bit_depth, dtype: float64


# Preprocess files to be similar to the format being used in odas

In [20]:
import librosa
import librosa.display
import tqdm as tqdm
import numpy as np
from multiprocessing import Pool

def extract_features(file_name):
   
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        mfccsscaled = np.mean(mfccs.T,axis=0)
        
    except Exception as e:
        print("Error encountered while parsing file: ", file_name, e)
        return None 
     
    return mfccsscaled

def process_entry(file_entry):
    file_loc = join_path(usl, "audio", 'fold' + str(entry["fold"]), str(entry["slice_file_name"]))
    class_label = entry["class"]
    return [extract_features(file_loc), class_label];

features = []

print("Starting Extraction")
with Pool(max_threads) as p:
    entries = us_meta.iterrows()
    for value in tqdm.tqdm(p.imap_unordered(process_entry, entries), total=us_meta.shape[0]):
        print(value)
        features.append(value)
        pass
    

features_df = pd.DataFrame(features, columns=['feature','class_label'])

print('Finished feature extraction from ', len(features_df), ' files')

Starting Extraction
[array([-3.1593384e+02,  9.5675888e+01, -3.8047771e+01,  4.7500740e+01,
       -7.3586011e+00,  1.0455470e+01, -1.8447603e+00,  1.8642927e+01,
       -1.1789134e+01,  7.2919984e+00, -7.3276439e+00,  1.0661768e+01,
       -1.4730461e+01,  2.3039916e+00, -1.0813203e+01,  5.0486927e+00,
       -4.1896577e+00,  1.9373505e+01,  9.9117869e-01,  4.5830026e+00,
       -1.0203203e+01,  1.4952717e+00, -8.8648930e+00,  7.1252317e+00,
        2.8728652e+00,  1.5023617e+00, -1.3836695e+01,  3.0785388e-01,
        4.4619193e+00,  2.4852614e+00, -1.1666129e+01, -3.5005753e+00,
        4.0923743e+00,  8.7135019e+00,  1.1072226e+00,  3.0423381e+00,
        2.5794458e+00,  5.7758051e-01, -1.1463765e+01, -5.9653535e+00],
      dtype=float32), 'car_horn']
[array([-3.1593384e+02,  9.5675888e+01, -3.8047771e+01,  4.7500740e+01,
       -7.3586011e+00,  1.0455470e+01, -1.8447603e+00,  1.8642927e+01,
       -1.1789134e+01,  7.2919984e+00, -7.3276439e+00,  1.0661768e+01,
       -1.4730461e+0

  4%|▍         | 357/8732 [00:03<01:32, 90.61it/s] Process ForkPoolWorker-42:
Process ForkPoolWorker-31:
Process ForkPoolWorker-30:
Process ForkPoolWorker-43:
Process ForkPoolWorker-34:
Process ForkPoolWorker-40:
Process ForkPoolWorker-32:
Process ForkPoolWorker-38:
Process ForkPoolWorker-35:
Traceback (most recent call last):
  File "/usr/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/usr/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/lib/python3.7/multiprocessing/pool.py", line 121, in worker
    result = (True, func(*args, **kwds))
Traceback (most recent call last):
  File "<ipython-input-20-5db7306a510b>", line 23, in process_entry
    return [extract_features(file_loc), class_label];
  File "/usr/lib/python3.7/multiprocessing/process.py", line 297, in _bootstrap
    self.run()
  File "/usr/lib/python3.7/multiprocessing/process.py", line 99, in run
    self._target(*se

KeyboardInterrupt: 

# Prep learning and training dataset
Will need to check at some point the 8k datasets because it does say something about don't randomise it or something.
Though for now lets get to training! :D

In [18]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

# Convert features and corresponding classification labels into numpy arrays
X = np.array(features_df.feature.tolist())
y = np.array(features_df.class_label.tolist())

# Encode the classification labels
le = LabelEncoder()
yy = to_categorical(le.fit_transform(y)) 

# split the dataset 
from sklearn.model_selection import train_test_split 

x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state = 42)

In [None]:
### store the preprocessed data for use in other notebooks

%store x_train 
%store x_test 
%store y_train 
%store y_test 
%store yy 
%store le