In [2]:
##safety executable check (for conda env)
import sys
sys.executable

'c:\\python37\\python.exe'

#### Feature extraction

In [5]:
# coding= UTF-8
import glob
import os
import librosa
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import specgram
import soundfile as sf

##Return audio features 
def feature_extraction(file_name):
    X, sample_rate = librosa.load(file_name)
    if X.ndim > 1:
        X = X[:,0]
    X = X.T
    
    # Get features   
    stft = np.abs(librosa.stft(X))
    mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40).T, axis=0) #40 values
    #zcr = np.mean(librosa.feature.zero_crossing_rate)
    chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0)
    mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0)
    contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T, axis=0)
    tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T, axis=0) #tonal centroid features
    
    ##Return computed features
    return mfccs, chroma, mel, contrast, tonnetz
    
# Process audio files: Return arrays with features and labels
def parse_audio_files(parent_dir, sub_dirs, file_ext='*.ogg'): ## .ogg audio format
    features, labels = np.empty((0,193)), np.empty(0) # 193 features total. This can vary
    
    for label, sub_dir in enumerate(sub_dirs): ##The enumerate() function adds a counter to an iterable.
        for file_name in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)): ##parent is data, sub_dirs are the classes
            try:
                mfccs, chroma, mel, contrast, tonnetz = feature_extraction(file_name)
            except Exception as e:
                print("[Error] there was an error in feature extraction. %s" % (e))
                continue
                
            extracted_features = np.hstack([mfccs,chroma, mel, contrast, tonnetz]) #Stack arrays in sequence horizontally (column wise)
            features = np.vstack([features, extracted_features]) #Stack arrays in sequence vertically (row wise).
            labels = np.append(labels, label)
        print("Extracted features from %s, done" % (sub_dir))
    return np.array(features), np.array(labels, dtype = np.int) ## arrays with features and corresponding labels for each audio

def one_hot_encode(labels): ##Check this hot encode
    n_labels = len(labels)
    n_unique_labels = len(np.unique(labels))
    one_hot_encode = np.zeros((n_labels, n_unique_labels))
    one_hot_encode[np.arange(n_labels), labels] = 1
    return one_hot_encode

# Read sub-directories (audio classes)
audio_directories = os.listdir("audio-data/")
audio_directories.sort()
print('Audio Classes: ', audio_directories)

Audio Classes:  ['001 - Dog bark', '002 - Rain', '003 - Sea waves', '004 - Baby cry', '005 - Clock tick', '006 - Person sneeze', '007 - Helicopter', '008 - Chainsaw', '009 - Rooster', '010 - Fire crackling']


In [5]:
##Get labels and features
features, labels = parse_audio_files('audio-data', audio_directories) #(parent dir,sub dirs)
np.save('feat.npy', features) ##NumPy array file created. Files are binary files to store numpy arrays
np.save('label.npy', labels)

Extracted features from 001 - Dog bark, done
Extracted features from 002 - Rain, done
Extracted features from 003 - Sea waves, done
Extracted features from 004 - Baby cry, done
Extracted features from 005 - Clock tick, done
Extracted features from 006 - Person sneeze, done
Extracted features from 007 - Helicopter, done
Extracted features from 008 - Chainsaw, done
Extracted features from 009 - Rooster, done
Extracted features from 010 - Fire crackling, done


In [6]:
# Label integer encoding 
labels = np.load('label.npy') # 10 labels total
#print(labels)

# For future label de-encoding
label_classes = np.array(['Dog bark','Rain','Sea waves','Baby cry','Clock tick','Person sneeze','Helicopter','Chainsaw','Rooster',
                          'Fire crackling'])
print(label_classes)

['Dog bark' 'Rain' 'Sea waves' 'Baby cry' 'Clock tick' 'Person sneeze'
 'Helicopter' 'Chainsaw' 'Rooster' 'Fire crackling']


In [7]:
features= np.load('feat.npy')
print(len(features)) # 400 features total
print(features)

400
[[-6.00969543e+02  4.73483706e+00 -8.54750538e+00 ...  5.66279962e-03
   4.17032259e-04 -7.70993961e-03]
 [-5.86117004e+02  1.02960014e+01 -3.04163551e+00 ... -6.24547078e-03
  -2.03854294e-03  2.32306968e-03]
 [-3.72678284e+02  1.40115265e+02 -2.42036057e+01 ...  2.89431465e-03
  -1.70204823e-03 -4.09344030e-03]
 ...
 [-1.35603989e+02  4.87148743e+01  4.97365036e+01 ...  2.84579661e-02
   1.15127581e-02  2.89216099e-02]
 [-3.63467255e+02  1.07908249e+02  3.86440392e+01 ...  2.82698262e-02
   3.18776160e-03  4.46420821e-03]
 [-2.54639069e+02  1.00809181e+02  4.67884598e+01 ...  2.55107970e-02
   9.83041570e-03  4.23216251e-03]]


##### Data visualization

In [23]:
#Pandas dataframe with 193 features variables for each audio
import pandas as pd
df = pd.DataFrame(features)

# Add a new column for class (label), this is our target
df['Audio class'] = pd.Categorical.from_codes(labels, label_classes)

#df[[0,1,2,3,4,5,6,7,8,9,10,11,'Audio class']]
df

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,184,185,186,187,188,189,190,191,192,Audio class
0,-600.969543,4.734837,-8.547505,-4.236475,-0.859366,-1.933589,-0.334930,0.865300,0.781836,0.480184,...,14.837836,14.807917,15.958999,0.015075,-0.015807,0.019585,0.005663,0.000417,-0.007710,Dog bark
1,-586.117004,10.296001,-3.041636,-2.835895,-0.191074,0.733057,-1.257987,0.130216,-0.124306,0.444428,...,16.910342,16.601088,18.417711,0.007447,0.023205,-0.002062,-0.006245,-0.002039,0.002323,Dog bark
2,-372.678284,140.115265,-24.203606,14.213630,12.299061,15.028257,6.264096,10.966953,5.519964,12.555666,...,16.864909,16.411913,31.218104,-0.000077,0.011256,0.006703,0.002894,-0.001702,-0.004093,Dog bark
3,-379.671661,101.103172,-3.667839,-18.936050,-24.032267,-24.907412,-14.509275,-9.875397,-18.122925,0.413763,...,17.885904,19.224964,32.954129,-0.013027,-0.000719,0.024324,0.050473,-0.012304,-0.006167,Dog bark
4,-330.258545,72.246254,-6.399735,-3.930399,11.812895,2.066310,-8.890626,-6.190706,-8.493846,-4.537625,...,20.504216,17.245558,31.904887,-0.008435,-0.016716,0.005628,0.041912,0.004531,-0.001821,Dog bark
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
395,-339.772339,5.307946,30.715794,11.945235,7.021287,-1.165371,2.045734,-5.427320,-3.173344,-0.711211,...,12.956860,12.982620,28.139996,-0.004697,0.009062,-0.006714,-0.001149,-0.001609,0.002657,Fire crackling
396,-392.526917,-10.900576,27.638479,7.710555,6.249085,-3.998501,1.237561,-6.599030,-4.111058,-1.256793,...,13.191586,12.983568,28.015941,-0.005060,0.010978,-0.001568,0.014283,0.001366,0.002631,Fire crackling
397,-135.603989,48.714874,49.736504,42.313267,33.507626,16.994192,16.213175,11.131563,2.055600,5.481640,...,14.078708,16.722628,31.363194,-0.015671,0.040178,-0.037101,0.028458,0.011513,0.028922,Fire crackling
398,-363.467255,107.908249,38.644039,84.292221,47.163967,31.058775,4.227959,9.912988,-1.614223,5.707627,...,13.876898,15.489605,31.184476,-0.004711,0.001801,-0.014769,0.028270,0.003188,0.004464,Fire crackling


#### SVM Classification 

In [9]:
# coding= UTF-8
import numpy as np
import sklearn
from sklearn.svm import SVC, LinearSVC
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler

from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import reciprocal, uniform

#Load data from generated numpy files
X = np.load('feat.npy') # list of features
y = np.load('label.npy').ravel() # labels are the target

# Split into train and test sets (400 Audios total)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)

# Data scaling (NOT IMPLEMENTING)
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train.astype(np.float32))
X_test_scaled = scaler.transform(X_test.astype(np.float32))

# Implement simple linear SVM
svm_clf = SVC(C=28.0, gamma = 0.00001, decision_function_shape="ovr") #These parameters can be modified

# Fit model
svm_clf.fit(X_train, y_train) #From Beif github
#svm_clf.fit(X_train_scaled, y_train) # HandsOn book

# Make predictions
#y_pred = svm_clf.predict(X_train_scaled)
y_predict = svm_clf.predict(X_test)

#print('Prediction')
#print(y_predict)
#print
#print("Actual")
#print(y_test)

# Accuracy
acc = svm_clf.score(X_test, y_test)
print
print("accuracy=%0.3f" %acc)

accuracy=0.750


### KNN Classifiers

In [15]:
# Import necessary modules 
from sklearn.neighbors import KNeighborsClassifier 
from sklearn.model_selection import train_test_split 
from sklearn.datasets import load_iris 
  
#Load data from generated numpy files
X = np.load('feat.npy') # list of features
y = np.load('label.npy').ravel() # labels are the target

# Split into train and test sets (400 Audios total)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)
 
knn = KNeighborsClassifier(n_neighbors=7) 
  
knn.fit(X_train, y_train) 
  
# Predict on dataset which model has not seen before 
print(knn.predict(X_test)) 
acc=knn.score(X_test, y_test)
print("accuracy=%0.3f"%acc) 

[3 1 8 5 6 6 0 2 3 5 0 3 2 8 5 2 2 3 5 4 3 6 9 8 5 2 1 0 2 2 4 2 3 4 6 1 2
 4 5 9 1 5 0 5 2 1 9 2 3 5 2 9 1 5 7 1 2 0 0 1 1 6 0 8 4 9 3 0 9 3 8 6 2 5
 2 0 9 2 9 8 5 1 1 2 7 5 3 9 9 4 9 2 9 0 5 3 3 9 6 9 2 1 2 4 2 8 2 5 3 2 2
 2 6 5 9 5 4 1 5 8 2 5 5 5 5 3 0 6 5 4 3 9 1 6 4 2 7 5 8 7 4 5 0 2 2 9 5 6
 5 6 8 3 2 2 5 3 6 3 5 2]
accuracy=0.613


## Random Forest

In [21]:

from sklearn.ensemble import RandomForestClassifier #Random Forest classifier
import pandas as pd 
import numpy as np
np.random.seed(0)



#Load data 
X = np.load('feat.npy') 
y = np.load('label.npy').ravel() 

#Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=42)

# Initialize classifier
rf_clf = RandomForestClassifier(n_jobs=2, random_state=0) #Check params

# Train model
rf_clf.fit(X_train, y_train)

# Make predictions
y_prediction = rf_clf.predict(X_test)

#print('Predicted values')
#print(y_prediction)
#print
#print('Actual values')
#print(y_test)
#print

# Evaluate accuracy
print
acc = rf_clf.score(X_test, y_test) 
print("Accuracy = %0.3f" %acc)

Accuracy = 0.833


## Naive Bayes

In [22]:
# coding= UTF-8
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
import pandas as pd 

#Load data 
X = np.load('feat.npy') 
y = np.load('label.npy').ravel() 

#Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# Initialize classifier
gnb_clf= GaussianNB() #check input params

# Train model
gnb_clf.fit(X_train, y_train)
#model = gnb_clf.fit(X_train, y_train)

# Make predictions
prediction = gnb_clf.predict(X_test)



acc = gnb_clf.score(X_test, y_test) 
print("Accuracy = %0.3f" %acc)

Accuracy = 0.614


## Neural Network

In [None]:
# coding= UTF-8
import numpy as np
import keras
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation
from keras.optimizers import SGD
from sklearn.model_selection import train_test_split

# Prepare the data
X =  np.load('feat.npy')
y =  np.load('label.npy').ravel() #Return a contiguous flattened array.

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=0)

# Build the Neural Network
model = Sequential()
model.add(Dense(512, activation='relu', input_dim=193)) ## Dense method for MLP
model.add(Dropout(0.5))
model.add(Dense(512, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(10, activation='softmax'))

model.compile(loss='categorical_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])

# Convert label to onehot
y_train = keras.utils.to_categorical(y_train-1, num_classes=10) # Convert class vector into binary Matrix
y_test = keras.utils.to_categorical(y_test-1, num_classes=10)

# Train and test
model.fit(X_train, y_train, epochs=100, batch_size=64) # Epochs are tunable
score, acc = model.evaluate(X_test, y_test, batch_size=32)
print('Test score:', score)
print('Test accuracy:', acc)