In [2]:
import os
import time
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
import seaborn as sns

from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split

from sklearn.linear_model import SGDClassifier, LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.ensemble import AdaBoostClassifier

from sklearn.metrics import confusion_matrix, classification_report,accuracy_score,average_precision_score,f1_score

import librosa
import IPython.display as ipd
import random

In [3]:
s, sr = librosa.load("../data//fold1/101415-3-0-2.wav")

In [4]:
ipd.Audio("../data/fold1/101415-3-0-2.wav")

In [5]:
df = pd.read_csv("../data/UrbanSound8K.csv")
df.head()

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.0,67.0,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.5,72.5,1,5,2,children_playing


In [6]:
folder_path = "../data"

folds = os.listdir(folder_path)
print(folds)

['fold1', 'fold10', 'fold2', 'fold3', 'fold4', 'fold5', 'fold6', 'fold7', 'fold8', 'fold9', 'UrbanSound8K.csv']


In [7]:
fold_paths = [f"{folder_path}/{i}" for i in folds]

fold_paths

['../data/fold1',
 '../data/fold10',
 '../data/fold2',
 '../data/fold3',
 '../data/fold4',
 '../data/fold5',
 '../data/fold6',
 '../data/fold7',
 '../data/fold8',
 '../data/fold9',
 '../data/UrbanSound8K.csv']

In [8]:
# Shuffling the folder paths inorder to aviod biased loeading of data
random.shuffle(fold_paths)
fold_paths

['../data/fold6',
 '../data/fold4',
 '../data/fold8',
 '../data/fold2',
 '../data/fold3',
 '../data/fold5',
 '../data/fold7',
 '../data/fold9',
 '../data/UrbanSound8K.csv',
 '../data/fold1',
 '../data/fold10']

## Extracting the training data and testing data paths

In [12]:
#Training Data Paths
s = time.time()
train_data_paths = []

for i in range(9):
    
    curr_folder_path = fold_paths[i]
    
    if (curr_folder_path == "../data/UrbanSound8K.csv"):
        continue
    
    curr_files = os.listdir(curr_folder_path)
    
    file_paths = [os.path.join(curr_folder_path, file) for file in curr_files]
    
    for j in range(len(file_paths)):
        curr_file = curr_files[j]
        class_id = df[df['slice_file_name'] == curr_file]
        arr = np.array(class_id['classID'])

        train_data_paths.append([file_paths[j], arr[0]])
        
        
e = time.time()
print((e-s)/60, " mins")
print(len(train_data_paths))
print (train_data_paths[9][0])

ipd.Audio(train_data_paths[9][0])

0.13983993927637736  mins
7022
../data/fold6\104327-2-0-34.wav


In [10]:
s = time.time()

test_data_paths = []
for i in range(9,11):
    
    curr_folder_path = fold_paths[i]
    
    if (curr_folder_path == "../input/urbansound8k/UrbanSound8K.csv"):
        continue
    
    curr_files = os.listdir(curr_folder_path)
    
    file_paths = [os.path.join(curr_folder_path, file) for file in curr_files]
    
    for j in range(len(file_paths)):
        curr_file = curr_files[j]
        class_id = df[df['slice_file_name'] == curr_file]
        arr = np.array(class_id['classID'])

        test_data_paths.append([file_paths[j], arr[0]])
        
      
e = time.time()
print((e-s)/60, " mins")
print(len(test_data_paths))
test_data_paths


0.034167627493540444  mins
1710


[['../data/fold1\\101415-3-0-2.wav', 3],
 ['../data/fold1\\101415-3-0-3.wav', 3],
 ['../data/fold1\\101415-3-0-8.wav', 3],
 ['../data/fold1\\102106-3-0-0.wav', 3],
 ['../data/fold1\\102305-6-0-0.wav', 6],
 ['../data/fold1\\102842-3-0-1.wav', 3],
 ['../data/fold1\\102842-3-1-0.wav', 3],
 ['../data/fold1\\102842-3-1-5.wav', 3],
 ['../data/fold1\\102842-3-1-6.wav', 3],
 ['../data/fold1\\103074-7-0-0.wav', 7],
 ['../data/fold1\\103074-7-0-1.wav', 7],
 ['../data/fold1\\103074-7-0-2.wav', 7],
 ['../data/fold1\\103074-7-1-0.wav', 7],
 ['../data/fold1\\103074-7-1-1.wav', 7],
 ['../data/fold1\\103074-7-1-2.wav', 7],
 ['../data/fold1\\103074-7-1-3.wav', 7],
 ['../data/fold1\\103074-7-1-4.wav', 7],
 ['../data/fold1\\103074-7-1-5.wav', 7],
 ['../data/fold1\\103074-7-1-6.wav', 7],
 ['../data/fold1\\103074-7-2-0.wav', 7],
 ['../data/fold1\\103074-7-3-0.wav', 7],
 ['../data/fold1\\103074-7-3-1.wav', 7],
 ['../data/fold1\\103074-7-3-2.wav', 7],
 ['../data/fold1\\103074-7-3-3.wav', 7],
 ['../data/fold1

In [15]:
from datacollector import DataCollector
metadata_path = "../data/UrbanSound8K.csv"

data_collector = DataCollector(fold_paths, metadata_path)

# Collect training data paths (from folds 0 to 8)
train_data_paths = data_collector.collect_data_paths(0, 9)

# Collect testing data paths (from folds 9 to 10)
test_data_paths = data_collector.collect_data_paths(9, 11)

data_paths = data_collector.collect_data_paths(0, 11)


0.13971411784489948  mins
7022
0.03635719617207845  mins
1710
0.1735539436340332  mins
8732


In [16]:
features1 = {"ae_mean", "ae_var", "rms_mean", "rms_var", "zcr_mean", "zcr_var", "chroma_stft_mean", 
            "chroma_stft_var", "spec_centroid_mean", "spec_centroid_var", "spec_cont_mean", "spec_cont_var",
            "spec_bw_mean", "spec_bw,var","percep_mean", "percep_var", "tempo_mean", "tempo_var", 
            "roll_off_mean", "roll_off_var", "roll_off50_mean","roll_off50_var","roll_off25_mean","roll_off25_var",
            "log_mel_mean", "log_mel_var", "mfcc_mean", "mfcc_var", "spec_mean", "spec_var"}

len(features1)

30

## Extracting features from the Audio data

In [17]:
features_list = {"ae_mean":[], "ae_var":[], "rms_mean":[], "rms_var":[], "zcr_mean":[], "zcr_var":[], "chroma_stft_mean":[], 
            "chroma_stft_var":[], "spec_centroid_mean":[], "spec_centroid_var":[], "spec_cont_mean":[], "spec_cont_var":[],
            "spec_bw_mean":[], "spec_bw_var":[],"percep_mean":[], "percep_var":[], "tempo_mean":[], "tempo_var":[], 
            "roll_off_mean":[], "roll_off_var":[], "roll_off50_mean":[],"roll_off50_var":[],"roll_off25_mean":[],"roll_off25_var":[],
            "log_mel_mean":[], "log_mel_var":[], "mfcc_mean":[], "mfcc_var":[],"spec_mean":[], "spec_var":[], 
            "mag_spec_mean" :[] ,"mag_spec_var":[], "mel_mean":[], "mel_var":[]}

TODO add fuctions to the class

In [19]:

import numpy as np
import librosa
import IPython.display as ipd

FRAME_LENGTH = 2048
HOP_LENGTH = 512
FRAME_SIZE = 2048
HOP_SIZE = 512


def amplitude_envelope(signal, frame_size = 2048, hop_length = 512):
    amplitude_envelope = []
        
    for i in range(0, len(signal), hop_length):
        current_frame_amplitude_envelope = max(signal[i:i+frame_size])
        amplitude_envelope.append(current_frame_amplitude_envelope)
        
    return np.array(amplitude_envelope)

def Rms(song):
    rms_song = librosa.feature.rms(y = song, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)
    return rms_song


def Zcr(song):
    zcr_song = librosa.feature.zero_crossing_rate(y = song, frame_length=FRAME_LENGTH, hop_length=HOP_LENGTH)
    return zcr_song

def Mag_spec(song):
    signal_ft = np.fft.fft(song)
    magnitude_spectrum = np.abs(signal_ft)
    return magnitude_spectrum
    
def spectrogram(song):
    song_stft = librosa.stft(song, n_fft=FRAME_SIZE, hop_length=HOP_SIZE)
    y_song = np.abs(song_stft)**2
    return y_song

def log_spec(song):    
    spec_song = spectrogram(song)
    y_song_log = librosa.power_to_db(spec_song)

def log_mel(song, samp_rate):
    mel_spectrogram = librosa.feature.melspectrogram(y = song, n_fft= 2048, sr = samp_rate, hop_length = 512 ,n_mels=50)
    log_mel_spectrogram = librosa.power_to_db(mel_spectrogram)
    return log_mel_spectrogram

def Mfcc(song, samp_rate, nmfcc = 13):
    mfccs = librosa.feature.mfcc(y = song, n_mfcc= nmfcc, sr = samp_rate)
    return mfccs

def delta_mfcc(song, samp_rate, nmfcc = 13):
    mfccs = Mfcc(song,samp_rate)
    delta_mfcc = librosa.feature.delta(mfccs)
    delta2_mfcc = librosa.feature.delta(mfccs, order=2)
    delta3_mfcc = librosa.feature.delta(mfccs, order=3)
    delta4_mfcc = librosa.feature.delta(mfccs, order=4)
    delta5_mfcc = librosa.feature.delta(mfccs, order=5)
    delta6_mfcc = librosa.feature.delta(mfccs, order=6)

    return (delta_mfcc, delta2_mfcc, delta3_mfcc, delta4_mfcc, delta5_mfcc, delta6_mfcc)

def Chroma_stft(song, samp_rate):  
    c_stft = librosa.feature.chroma_stft(y = song,sr = samp_rate)
    return c_stft

def Spec_centriod(song, samp_rate):
    return librosa.feature.spectral_centroid(y = song,sr = samp_rate)

# spectral rolloff is the frequency below which a specified percentage of the total spectral energy, e.g. 85%, lies.
def spec_roll_off(song, samp_rate):
    return librosa.feature.spectral_rolloff(y = song, sr =samp_rate)

def spec_roll_off50(song, samp_rate):
    return librosa.feature.spectral_rolloff(y = song, sr= samp_rate, roll_percent=0.5)

def spec_roll_off25(song, samp_rate):
    return librosa.feature.spectral_rolloff(y = song,sr = samp_rate, roll_percent=0.25)

def spec_contrast(song, samp_rate):
    return librosa.feature.spectral_contrast(y = song,sr = samp_rate)

def perceptual_wt(song, samp_rate):
    return librosa.perceptual_weighting(S = song)

def tempogram(song, samp_rate):
    return librosa.feature.tempogram(y = song, sr = samp_rate)

def spec_bandwidth(song, samp_rate):
    return librosa.feature.spectral_bandwidth(y = song, sr = samp_rate)

In [20]:
s = time.time()

for i in range(len(data_paths)):
    
    sample , sr = librosa.load(data_paths[i][0])
    
    
    ae = amplitude_envelope(sample, frame_size = 2048, hop_length = 512)
    ae_m, ae_v = ae.mean(), ae.var()
    features_list["ae_mean"].append(ae_m)
    features_list["ae_var"].append(ae_v)
    
    
    rms = Rms(sample)
    rms_m, rms_v = rms.mean(), rms.var()
    features_list["rms_mean"].append(rms_m)
    features_list["rms_var"].append(rms_v)
    
    zcr =  Zcr(sample)
    zcr_m, zcr_v = zcr.mean(), zcr.var()
    features_list["zcr_mean"].append(zcr_m)
    features_list['zcr_var'].append(zcr_v)
    
    
    mag_spec = Mag_spec(sample)
    features_list["mag_spec_mean"].append(mag_spec.mean())
    features_list["mag_spec_var"].append(mag_spec.var())
    
    
    spec = spectrogram(sample)
    features_list["spec_mean"].append(spec.mean())
    features_list['spec_var'].append(spec.var())
    
    
    mel_spec = log_mel(sample, sr)
    features_list["mel_mean"].append(mel_spec.mean())
    features_list["mel_var"].append(mel_spec.var())
    
    
    mfcc = Mfcc(sample, sr)
    features_list["mfcc_mean"].append(mfcc.mean())
    features_list["mfcc_var"].append(mfcc.var())
    
            
    chroma_stft = Chroma_stft(sample, sr)
    features_list["chroma_stft_mean"].append(chroma_stft.mean())
    features_list['chroma_stft_var'].append(chroma_stft.var())
    
    spec_centriod = Spec_centriod(sample, sr)
    features_list['spec_centroid_mean'].append(spec_centriod.mean())
    features_list['spec_centroid_var'].append(spec_centriod.var())
    
    
    spec_roll = spec_roll_off(sample, sr)
    features_list["roll_off_mean"].append(spec_roll.mean())
    features_list['roll_off_var'].append(spec_roll.var())
    
    
    spec_roll50 = spec_roll_off50(sample, sr)
    features_list["roll_off50_mean"].append(spec_roll50.mean())
    features_list['roll_off50_var'].append(spec_roll50.var())
    
    
    spec_roll25 =  spec_roll_off25(sample, sr)
    features_list["roll_off25_mean"].append(spec_roll25.mean())
    features_list['roll_off25_var'].append(spec_roll25.var())
    
    
    spec_contr =  spec_contrast(sample, sr)
    features_list["spec_cont_mean"].append(spec_contr.mean())
    features_list['spec_cont_var'].append(spec_contr.var())
    

#     percep =  perceptual_wt(sample, sr)
#     features_list["percep_mean"].append(percep.mean())
#     features_list['percep_var'].append(percep.var())
    
    
    tempo =  tempogram(sample, sr)
    features_list["tempo_mean"].append(tempo.mean())
    features_list["tempo_var"].append(tempo.var())
    
    
    spec_bw =  spec_bandwidth(sample, sr)
    features_list["spec_bw_mean"].append(spec_bw.mean())
    features_list['spec_bw_var'].append(spec_bw.var())
    
    log_me = log_mel(sample, sr)
    features_list["log_mel_mean"].append(log_me.mean())
    features_list['log_mel_var'].append(log_me.var())
    
#     print(i)

    
e = time.time()
print((e - s)/60  , "mins")

  return pitch_tuning(


28.614593732357026 mins


In [21]:
print(len(features_list))

34


In [22]:
del features_list["percep_mean"]
del features_list["percep_var"]

In [23]:
feature_df = pd.DataFrame(features_list)
feature_df

Unnamed: 0,ae_mean,ae_var,rms_mean,rms_var,zcr_mean,zcr_var,chroma_stft_mean,chroma_stft_var,spec_centroid_mean,spec_centroid_var,...,log_mel_mean,log_mel_var,mfcc_mean,mfcc_var,spec_mean,spec_var,mag_spec_mean,mag_spec_var,mel_mean,mel_var
0,0.149282,0.014686,0.036457,0.000857,0.258637,0.005267,0.559152,0.059094,3574.630967,413446.432939,...,-15.533721,119.464134,-18.431484,3430.470947,1.687729,123.678757,9.960397,95.902847,-15.533721,119.464134
1,0.347395,0.017407,0.100865,0.001697,0.218753,0.004286,0.534395,0.072700,3289.877463,327178.108834,...,-4.988582,66.677063,-11.099107,829.579590,9.126315,6474.970703,19.361811,679.211071,-4.988582,66.677063
2,0.195377,0.016779,0.050473,0.000908,0.226105,0.002822,0.544626,0.072916,3194.741895,223225.384307,...,-11.599207,76.346642,-15.194706,2013.253418,2.667969,703.012329,10.952194,188.635973,-11.599207,76.346642
3,0.203674,0.003955,0.058281,0.000223,0.154624,0.001250,0.625679,0.048116,2945.500877,131307.204614,...,-7.788991,59.251881,-4.159441,1231.435791,2.805299,916.879822,10.660433,208.096372,-7.788991,59.251881
4,0.142380,0.001004,0.042241,0.000080,0.124026,0.000784,0.507904,0.074726,2541.069048,76634.671213,...,-11.885075,78.149147,-6.330313,2223.590820,1.441622,127.031258,6.929858,117.666083,-11.885075,78.149147
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
8727,0.184060,0.016045,0.045473,0.000786,0.402863,0.009161,0.526332,0.070463,4537.477572,286603.811113,...,-11.485449,49.542812,-17.651739,1690.843506,2.191361,289.115906,10.229035,148.404577,-11.485449,49.542812
8728,0.226428,0.009619,0.065369,0.000740,0.364969,0.010937,0.510395,0.063308,4208.264715,389370.298246,...,-8.368240,60.312012,-15.366465,1217.042114,3.864394,582.782593,12.551192,289.233965,-8.368240,60.312012
8729,0.209352,0.006634,0.063438,0.000710,0.292946,0.006543,0.546787,0.056688,3829.861053,266074.687371,...,-8.635166,70.379433,-15.943607,1351.653564,3.635856,482.065735,11.390014,290.117826,-8.635166,70.379433
8730,0.195731,0.010443,0.055393,0.000848,0.297239,0.006732,0.586017,0.054295,3885.937356,191547.448804,...,-9.940153,71.772781,-17.681288,1569.002075,3.018281,350.575043,10.788252,232.454590,-9.940153,71.772781


In [24]:
feature_df.to_csv("../data/output/features.csv", index = False)