In [1]:
import glob
import os
import librosa
import numpy as np
import pandas as pd
from scipy.stats import kurtosis, skew

In [2]:
featureVectorLength = 140

In [3]:
def extract_features_from_file(file_name):
    raw_sound, sample_rate = librosa.load(file_name)

    # one row per extracted coefficient, one column per frame
    mfccs = librosa.feature.mfcc(y=raw_sound, sr=sample_rate, n_mfcc=20)

    mfccs_min = np.min(mfccs, axis=1)  # row-wise summaries
    mfccs_max = np.max(mfccs, axis=1)
    mfccs_median = np.median(mfccs, axis=1)
    mfccs_mean = np.mean(mfccs, axis=1)
    mfccs_variance = np.var(mfccs, axis=1)
    mfccs_skeweness = skew(mfccs, axis=1)
    mfccs_kurtosis = kurtosis(mfccs, axis=1)

    return mfccs_min, mfccs_max, mfccs_median, mfccs_mean, mfccs_variance, mfccs_skeweness, mfccs_kurtosis

In [4]:
def extract_features_from_directories(parent_dir, sub_dirs, file_ext="*.wav"):
    feature_matrix, labels = np.empty((0, featureVectorLength)), np.empty(0)

    for label, sub_dir in enumerate(sub_dirs):
        for fn in glob.glob(os.path.join(parent_dir, sub_dir, file_ext)):
            try:
                mfccs_min, mfccs_max, mfccs_median, mfccs_mean, mfccs_variance, mfccs_skeweness, mfccs_kurtosis = extract_features_from_file(fn)
                print("Finished processing file: ", fn)
            except Exception as e:
                print("Error while processing file: ", fn)
                continue

            # concatenate extracted features
            new_feature_vector = np.hstack([mfccs_min, mfccs_max, mfccs_median, mfccs_mean, mfccs_variance, mfccs_skeweness, mfccs_kurtosis])

            # add current feature vector as last row in feature matrix
            feature_matrix = np.vstack([feature_matrix, new_feature_vector])

            # extracts label from the file name. Change '\\' to  '/' on Unix systems
            labels = np.append(labels, fn.split('\\')[3].split('-')[1])

    return np.array(feature_matrix), np.array(labels, dtype=np.int)

In [5]:
def extract_features(dirs):
    parent_dir = 'UrbanSound8k\\audio'
    os.mkdir('Features')
    for d in dirs:
    
        features, labels = extract_features_from_directories(parent_dir,[d])
        
        features_df = pd.DataFrame(features, columns=list(range(1, featureVectorLength+1)))
        labels = pd.Series(labels.tolist())
        
        features_df.to_pickle('Features\\' + d + '_features.pkl')
        labels.to_pickle('Features\\' + d + '_labels.pkl')
        

In [None]:
dirs =  ["fold1", "fold2", "fold3", "fold4", "fold5", "fold6", "fold7", "fold8", "fold9", "fold10"]
extract_features(dirs)

In [None]:
def mean_normalize(featureMatrix):
    mean = np.mean(featureMatrix, axis=0)  # compute mean of each column (feature)
    std = np.std(featureMatrix, axis=0, ddof=1)  # compute sample std of each column (feature)

    featureMatrix -= mean  # subtract each column's mean from every value in the corresponding column
    featureMatrix /= std  # divide values in each column with the corresponding sample std for that column

    return featureMatrix

In [None]:
#Loading the features
features = {}
labels = {}

for d in dirs:
    features[d] = mean_normalize(pd.read_pickle('Features\\' + d + '_features.pkl'))
    labels[d] = pd.read_pickle('Features\\' + d + '_labels.pkl')

In [None]:
sound_data = [features[i] for i in dirs]
sound_labels = [labels[i] for i in dirs]

sound_data = pd.concat(sound_data, ignore_index=True)
sound_labels = pd.concat(sound_labels, ignore_index=True)

In [None]:
from sklearn.model_selection import train_test_split, cross_val_score, ShuffleSplit, cross_val_predict
from sklearn import svm
import warnings
warnings.filterwarnings('ignore')

In [None]:
X_train, X_test, Y_train, Y_test = train_test_split(sound_data, sound_labels.values, test_size=0.3, random_state=0)
clf = svm.SVC(C = 1, kernel = 'rbf' , gamma = 'auto')
clf.fit(X_train, Y_train)

In [None]:
#cv = ShuffleSplit(n_splits=5, test_size=0.3, random_state=0)
#clf1 = svm.SVC(C = 1, kernel = 'rbf' , gamma = 'auto')
#scores = cross_val_score(clf1, sound_data, sound_labels.values, cv = 10)
#scores.mean()

In [None]:
Y_pred = cross_val_predict(clf1 , sound_data, sound_labels.values, cv = 10)

In [None]:
print("Accuracy : " , accuracy(sound_labels.values , Y_pred))