# Modules needed
## Preprocessing
 * install pandas
 * wavefile
 * matplotlib
 * LibROSA
 * numba==0.48.0
## Machine learning
 * numpy
 * keras
 * sklearn
 * tensorflow
 * tqdm (just for fun)

If you are using the docker contianer then you should just have to do this
`pip install pandas wavefile matplotlib librosa numba==0.48.0 numpy keras sklearn tqdm keras`
 
## To enable progress bars
`jupyter nbextension enable --py widgetsnbextension`

In [1]:
#!pip freeze > requirements.txt

In [2]:
import pandas as pd
import os
from os.path import join as join_path
from wavefile import WaveReader

### Performance Tweaking
How many threads to use for multithreading and such.
Some of the processing takes forever using default single threaded capabilities

In [3]:
max_threads = os.cpu_count()

print("Threads: " + str(max_threads))

Threads: 24


In [4]:
usl = "../resources/UrbanSound8K/"

us_meta = pd.read_csv(usl + 'metadata/UrbanSound8K.csv')

In [5]:
from tqdm import tqdm
import pandas as pd

audio_data = []

for i, entry in tqdm(us_meta.iterrows(), total=us_meta.shape[0], desc="Processing audio files"):
    file_loc = join_path(usl, "audio", 'fold' + str(entry["fold"]), str(entry["slice_file_name"]))
    with WaveReader(file_loc) as r:
        bit_depth = int((r.byterate) / (r.samplerate * r.channels) * 8)
        audio_data.append((r.channels, r.samplerate, bit_depth))

audio_df = pd.DataFrame(audio_data, columns=['num_channels', 'sample_rate', 'bit_depth'])


Processing audio files: 100% 8732/8732 [00:43<00:00, 198.55it/s]


### Summaries of Sample Data

In [6]:
print("Number of channels")
print(audio_df.num_channels.value_counts(normalize=True))

print("\nSample Rates")
print(audio_df.sample_rate.value_counts(normalize=True))

print("\nBit Depth")
print(audio_df.bit_depth.value_counts(normalize=True))

Number of channels
num_channels
2    0.915369
1    0.084631
Name: proportion, dtype: float64

Sample Rates
sample_rate
44100     0.614979
48000     0.286532
96000     0.069858
24000     0.009391
16000     0.005153
22050     0.005039
11025     0.004466
192000    0.001947
8000      0.001374
11024     0.000802
32000     0.000458
Name: proportion, dtype: float64

Bit Depth
bit_depth
16    0.659414
24    0.315277
32    0.019354
8     0.004924
4     0.001031
Name: proportion, dtype: float64


### Preprocess files to be similar to the format being used in odas

In [7]:
import librosa
import librosa.display
#import tqdm.notebook as tqdm
import tqdm
import numpy as np
from multiprocessing import Pool

max_pad_len = 174

def extract_features(file_name):

    # Load from processed file
    if os.path.isfile(file_name + ".npy"):
        return np.load(file_name + ".npy")
   
    try:
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') 
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        pad_width = max_pad_len - mfccs.shape[1]
        mfccs = np.pad(mfccs, pad_width=((0, 0), (0, pad_width)), mode='constant')
        
    except Exception as e:
        print("Error encountered while parsing file: ", file_name, e)
        return None

    np.save(file_name + ".npy", mfccs)
     
    return mfccs

def process_entry(file_entry):
    i, entry = file_entry
    entry_loc = join_path(usl, "audio", 'fold' + str(entry["fold"]), str(entry["slice_file_name"]))
    class_label = entry["class"]
    return [extract_features(entry_loc), class_label];



In [8]:
urban_sound_features = []

print("Extracting features: UrbanSound8K")
with Pool(max_threads) as p:
    entries = us_meta.iterrows()
    for value in tqdm.tqdm(p.imap(process_entry, entries), total=us_meta.shape[0]):
        if value[1] != "gun_shot":# and value[1] != "dog_bark":
            urban_sound_features.append(value)
        pass

Extracting features: UrbanSound8K


100% 8732/8732 [00:08<00:00, 975.67it/s] 


In [11]:
from os import listdir
from os.path import isfile, join, isdir

custom_sound_features = []
custom_sound_loc = "../resources/CustomSounds"
sound_class_folders = [f for f in listdir(custom_sound_loc) if isdir(join(custom_sound_loc, f))]
# (fileLoc, class)test
print("Extracting features: CustomSounds")
custom_sounds = []
with Pool(max_threads) as p:
    for class_name in sound_class_folders:
        print("Extracting:", class_name)
        sound_files = [f for f in listdir(join(custom_sound_loc, class_name))\
                       if isfile(join(custom_sound_loc, class_name, f)) 
                       and not f.endswith(('.npy', '.DS_Store'))]
        if len(sound_files) == 0:
            print("No sounds found for:", class_name)
            continue
        file_locations = list(map(lambda f: join(custom_sound_loc, class_name, f), sound_files))
        for value in tqdm.tqdm(p.imap(extract_features, file_locations), total=len(file_locations)):
            custom_sound_features.append((value, class_name))
    
    

Extracting features: CustomSounds
Extracting: KnockDetected


100% 55/55 [00:00<00:00, 1116.52it/s]


Extracting: NoKnock


100% 191/191 [00:00<00:00, 1162.85it/s]


In [12]:
features = urban_sound_features + custom_sound_features
features_df = pd.DataFrame(features, columns=['feature','class_label'])
print('Finished feature extraction from ', len(features_df), ' files')

print("Label Distribution")
print(features_df.class_label.value_counts(normalize=True))

Finished feature extraction from  8604  files
Label Distribution
class_label
dog_bark            0.116225
children_playing    0.116225
air_conditioner     0.116225
street_music        0.116225
engine_idling       0.116225
jackhammer          0.116225
drilling            0.116225
siren               0.107973
car_horn            0.049861
NoKnock             0.022199
KnockDetected       0.006392
Name: proportion, dtype: float64


### Prep learning and training dataset
Will need to check at some point the 8k datasets because it does say something about don't randomise it or something.
Though for now lets get to training! :D

In [13]:
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.utils import to_categorical

# Convert features and corresponding classification labels into numpy arrays
X = np.array(features_df.feature.tolist())
y = np.array(features_df.class_label.tolist())

# Encode the classification labels
le = LabelEncoder()
yy = to_categorical(le.fit_transform(y)) 

# split the dataset 
from sklearn.model_selection import train_test_split 

x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state = 42)

2024-08-19 21:33:01.842393: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


### Store pre-processed data

In [14]:
### Save values

%store x_train 
%store x_test 
%store y_train 
%store y_test 
%store yy 
%store le
%store X
%store y


Stored 'x_train' (ndarray)
Stored 'x_test' (ndarray)
Stored 'y_train' (ndarray)
Stored 'y_test' (ndarray)
Stored 'yy' (ndarray)
Stored 'le' (LabelEncoder)
Stored 'X' (ndarray)
Stored 'y' (ndarray)
