In [2]:
#Extracting Features for every audio file

import librosa
import librosa.display
from scipy.io import wavfile as wav
import IPython.display as ipd
import numpy as np
import os
import matplotlib.pyplot as plt
import pandas as pd

# Creating a function that extracts the MFCC features of an audio file
def extract_features(file_name):
    
    try:
        
        # Librosa extraction of audio array and sampling rate
        audio, sample_rate = librosa.load(file_name, res_type='kaiser_fast') # resampling at a "faster rate as opposed to higher quality"
        # MFCC feature extraction of audio - mfccs is mfcc sequence (array), n_mfcc is number of MFCCs to return
        mfccs = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
        
        # why ????
        mfccsscaled = np.mean(mfccs.T, axis=0) # transpose and get mean of the samples
        
    except Exception as e:
        print("Error encountered while parsing file ", file_name)
        return None
    
    return mfccsscaled





In [3]:
# Set the path to the full UrbanSound dataset
cwd = os.getcwd()
categories = ['dog_bark', 'car_horn', 'gun_shot', 'siren']
metadata = pd.read_csv(cwd + "//UrbanSound8K//metadata//UrbanSound8K.csv")

features = []

for index, row in metadata.iterrows():
    
    # Extract filename and category
    category_str = row["class_name"]
    
    # Loop through metadata comparing the categories
    if category_str in categories:
        file_name = os.path.join(os.path.abspath(cwd + "//UrbanSound8K//audio//"),'fold'+str(row["fold"])+'//',str(row["slice_file_name"]))
        # Extract features for each wave file
        data = extract_features(file_name)
        features.append([data, category_str])
#        print(data)
#        print('hold. post transpose below')
#        print(features)
#        break
    else:
        continue

# Convert into a Panda dataframee
featuresdf = pd.DataFrame(features, columns=['feature','class_label'])

print('Finished feature extraction from ',len(featuresdf), 'files')
display(featuresdf)

Finished feature extraction from  2732 files


Unnamed: 0,feature,class_label
0,"[-215.793, 71.66612, -131.81377, -52.09133, -2...",dog_bark
1,"[-196.18529, 114.94507, -14.661183, 1.2298628,...",car_horn
2,"[-202.99028, 111.43653, -28.663649, 14.733859,...",car_horn
3,"[-206.0117, 90.92227, -25.063507, 29.089552, -...",car_horn
4,"[-204.60738, 103.652916, -29.975658, 27.412031...",car_horn
...,...,...
2727,"[-399.22574, 136.81903, -51.96422, 37.023987, ...",car_horn
2728,"[-346.72726, 87.488495, -46.265022, 52.748856,...",car_horn
2729,"[-304.6132, 112.61994, -47.161945, 37.003487, ...",car_horn
2730,"[-344.7142, 126.758156, -56.17717, 36.070923, ...",car_horn


2732


In [33]:
# Normalize the dataframe such that there are the same number of files per class_label
# This ensures that no one category has an advantage when the model is being trained

print(featuresdf.class_label.count()) # 2732

# Create dictionary of dataframes
frames = {}
categories = ['dog_bark', 'car_horn', 'gun_shot', 'siren']

arr_Size = []

for label in categories:
    frames[label] = featuresdf[featuresdf['class_label'] == label]
    # Extract shape and get number of rows
    rNc = frames[label].shape
    # Gets number of rows
    arr_Size.append(rNc[0])
    print(label, rNc[0])

# Take the minimum size from size array
minSize = min(arr_Size)

# Utilize minimum size to slice rows such that only the minimum size is maintained
for label in frames:
    frames[label] = frames[label].sample(minSize)
    print(frames[label].shape[0])
    
# Concatenate all dataframes in dictionary of dataframes
# Place the concatenated frame back in featuresdf
# Reindex
result = pd.concat(frames)
#display(type(result))
features_temp = pd.DataFrame()
features_temp = result[["feature", "class_label"]]

# Reindex features_temp
features_temp = features_temp.reset_index(drop=True)
display(features_temp)

2732
dog_bark 1000
car_horn 429
gun_shot 374
siren 929
374
374
374
374


Unnamed: 0,feature,class_label
0,"[-395.19397, 143.10742, 18.418566, 1.2344542, ...",dog_bark
1,"[-409.3987, 85.58972, 18.60112, -1.9504186, -2...",dog_bark
2,"[-208.6443, 85.23191, -41.25806, 15.916695, -3...",dog_bark
3,"[-391.55826, 74.2019, -64.09721, -19.767124, 1...",dog_bark
4,"[-659.502, 58.18304, -9.349158, -9.586478, -7....",dog_bark
...,...,...
1491,"[-332.87567, 149.99313, -36.90327, 33.533695, ...",siren
1492,"[-503.34763, 173.59569, -21.291695, 31.144808,...",siren
1493,"[-420.78137, 144.98412, -45.337044, 14.3555155...",siren
1494,"[-489.23328, 122.28847, -52.489998, 8.501058, ...",siren


In [34]:
# Send temp features to features df
featuresdf = features_temp
display(featuresdf)

Unnamed: 0,feature,class_label
0,"[-395.19397, 143.10742, 18.418566, 1.2344542, ...",dog_bark
1,"[-409.3987, 85.58972, 18.60112, -1.9504186, -2...",dog_bark
2,"[-208.6443, 85.23191, -41.25806, 15.916695, -3...",dog_bark
3,"[-391.55826, 74.2019, -64.09721, -19.767124, 1...",dog_bark
4,"[-659.502, 58.18304, -9.349158, -9.586478, -7....",dog_bark
...,...,...
1491,"[-332.87567, 149.99313, -36.90327, 33.533695, ...",siren
1492,"[-503.34763, 173.59569, -21.291695, 31.144808,...",siren
1493,"[-420.78137, 144.98412, -45.337044, 14.3555155...",siren
1494,"[-489.23328, 122.28847, -52.489998, 8.501058, ...",siren


In [51]:
# Use sklearn.preprocessing.LabelEncoder to encode the categorical text data into model-understandable numerical data

from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical




# Convert features and corresponding classification labels into numpy arrays
X = np.array(featuresdf.feature.tolist())
y = np.array(featuresdf.class_label.tolist())

display(y)

#This part will convert the categories into their respective numerical value
le = LabelEncoder()
# Fit transform receives categories and assigns numerical value to them. to_categorical converts to binary matrix
yy = to_categorical(le.fit_transform(y))
unique_rows = np.unique(yy, axis=0)
display(unique_rows)

array(['dog_bark', 'dog_bark', 'dog_bark', ..., 'siren', 'siren', 'siren'],
      dtype='<U8')

array([[0., 0., 0., 1.],
       [0., 0., 1., 0.],
       [0., 1., 0., 0.],
       [1., 0., 0., 0.]], dtype=float32)

In [52]:
#print(yy[0])

#for i in yy:
 #   print(i)
  #  if i == [1., 0., 0., 0]
   #     print("woo")
    #Xdog


In [55]:
# Split the Data 
# Need to split for training vs. testing (80% vs. 20%)

from sklearn.model_selection import train_test_split

# X is feature, Y is labels
# 42 is the seed to generating random numbers - starting position, integer required to ensure training and testing are consistent
#x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2, random_state=42)
x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.1, random_state=42)
#print(x_train, x_test, y_train, y_test)


In [56]:
# Store data into next notebook
%store x_train
%store x_test
%store y_test
%store y_train
%store yy
%store le

Stored 'x_train' (ndarray)
Stored 'x_test' (ndarray)
Stored 'y_test' (ndarray)
Stored 'y_train' (ndarray)
Stored 'yy' (ndarray)
Stored 'le' (LabelEncoder)
