In [4]:
from brian2 import *
from brian2hears import *
import os
import pandas as pd

Cochleagram Preprocessing (Filterbanks Only)

In [None]:
def cgram_preprocess(filepath,cfN = 224, num_frames = 224):
    sound = Sound(filepath)
    cf = erbspace(20*Hz,20000*Hz,cfN)
    gammatone = Gammatone(sound,cf)
    cochlea = FunctionFilterbank(gammatone, lambda x: clip(x,0,Inf)**(1/3))
    output = cochlea.process()
    return output

Linear Gammachirp Preprocessing (Filterbanks Only)

In [None]:
def LinGC_preprocess(filepath,cfN=224,num_frames=224):
    sound = Sound(filepath)
    #center frequencies with a spacing following an ERB scale
    center_frequencies = erbspace(20*Hz, 20000*Hz, cfN)
    c = 0 #glide slope
    time_constant = linspace(3, 0.3, cfN)*ms
    gamma_chirp = LinearGammachirp(sound, center_frequencies, time_constant, c)
    gamma_chirp = FunctionFilterbank(gamma_chirp, lambda x: clip(x,0,Inf)**(1/3))
    output = gamma_chirp.process()
    return output

Logarithmic Gammachirp Preprocessing (Filterbanks Only)

In [5]:
def LogGC_preprocess(filepath, cfN = 224, num_frames = 224, c1 = -2.96, b1 = 1.81):
    #c1 - glide slope
    #b1 - factor determining time constant of the filters
    sound = Sound(filepath)
    cf = erbspace(20*Hz, 20000*Hz, cfN) # centre frequencies
    fb = LogGammachirp(sound, cf, c=c1, b=b1)
    cochlea = FunctionFilterbank(fb, lambda x: clip(x,0,Inf)**(1/3))
    output = cochlea.process()
    return output

Approximate Gammatone Preprocessing (Filterbanks Only)

In [None]:
def approxGT_preprocess(filepath, cfN = 224, num_frames = 224):
    sound = Sound(filepath)
    cf = erbspace(20*Hz, 20000*Hz, cfN) # centre frequencies
    bw = 10**(0.037+0.785*log10(cf/Hz))
    fb = ApproximateGammatone(sound, cf, bw, order=4)
    cochlea = FunctionFilterbank(fb, lambda x: clip(x,0,Inf)**(1/3))
    output = cochlea.process()
    return output

Run Preprocessing Scripts on Fold 5

In [6]:
meta_df = pd.read_csv("data/UrbanSound8K.csv")
file_path = 'data/urbansound8k_mono'
class_map = {'0' : 'air_conditioner', '1' : 'car_horn', '2' : 'children_playing', '3' : 'dog_bark', '4' : 'drilling', 
                 '5' : 'engine_idling', '6' : 'gun_shot', '7' : 'jackhammer', '8' : 'siren', '9' : 'street_music'}

#removing the 9 corrupt/incorrect PCM encoded files
meta_df = meta_df[meta_df.fsID != 19007]
meta_df = meta_df[meta_df.fsID != 36429]
meta_df = meta_df[meta_df.fsID != 88466]
meta_df = meta_df[meta_df.fold == 5]

features = []
count_records = len(meta_df.slice_file_name)

for index, row in meta_df.iterrows():
    file_name = os.path.join(os.path.abspath(file_path),'fold'+str(row["fold"])+'/',str(row["slice_file_name"]))
    #cgram_preprocess(file_name, cfN = 224, num_frames = 224)
    #approxGT_preprocess(file_name, cfN = 224, num_frames = 224)
    LogGC_preprocess(file_name, cfN = 224, num_frames = 224)
    #LinGC_preprocess(file_name, cfN = 224, num_frames = 224)
    del file_name