# Sound_MFCC.csv Generator

In [1]:
import os
import glob
import librosa
import datetime as d
import pandas as pd
import numpy as np
from scipy.io import wavfile as wav

In [2]:
#Helper functions...

def read_audio(filename):
    sr, _ = wav.read(filename)
    x, sr = librosa.load(filename,sr=sr)
    
    _,_,year,month,day,hour,minute,sec,_=filename.split("\\")[-1].split('_')
    start_time=d.datetime(int(year),int(month),int(day),int(hour),int(minute),int(sec))
    
    time_data=[]
    d_counter=0
    x_len=x.shape[0]

    for i in range(x_len):
        if (i+1)%sr==0:
            time_data.extend([str(start_time)]*sr)
            d_counter+=sr
            start_time+=d.timedelta(seconds=1)
    else:
        remaining=x_len-d_counter
        time_data.extend([str(start_time)]*remaining)
        
    df=pd.DataFrame({'time':time_data,'value':x})
    
    return df,sr


def format_date(mmddyyyy):
    mm,dd,yyyy=mmddyyyy.split('/')
    return '-'.join([yyyy,mm,dd])


def do_mfcc(start_to_end):
    start,end=start_to_end.split('->')
    slot=df_wav[(df_wav.time>=start) & (df_wav.time<=end)].value.values
    mfccs = librosa.feature.mfcc(slot, sr=sr)
    return sorted(list(mfccs.mean(axis=1)),reverse=True)[:MFCC_COMPONENT]

In [3]:
#Hyper-Parameters...

MFCC_COMPONENT=5
folders=glob.glob("./Trails/*/*/")
num_folders=len(folders)

folder=folders[0]
df_wav=read_audio(glob.glob(folder+'*.wav')[0])
df_ana_gps=pd.read_csv(glob.glob(folder+'Analysis_GPS.csv')[0])

df_time_slots=\
pd.DataFrame(df_ana_gps.start_date.apply(format_date)+' '+\
df_ana_gps.start_time+'->'+\
df_ana_gps.end_date.apply(format_date)+' '+\
df_ana_gps.end_time,columns=['time_slots'])

res=df_time_slots.time_slots.apply(do_mfcc)
df_mfcc=pd.DataFrame(list(res),columns=[f'mfcc{i}' for i in range(MFCC_COMPONENT)])
df_mfcc.head()

In [4]:
#Processing....

for i,folder in enumerate(folders):
    if os.path.isfile(folder+'Sound_MFCC.csv'): #checking weither Sound_MFCC file exist already?
        print(f'{i+1}/{num_folders}. folder: {folder} is already processed and Augumented\n') #file is already processed
    else:
        df_wav,sr=read_audio(glob.glob(folder+'*.wav')[0])
        df_ana_gps=pd.read_csv(folder+'Analysis_GPS.csv')
        
        df_time_slots=\
                pd.DataFrame(df_ana_gps.start_date.apply(format_date)+' '+\
                df_ana_gps.start_time+'->'+\
                df_ana_gps.end_date.apply(format_date)+' '+\
                df_ana_gps.end_time,columns=['time_slots'])
        
        res=df_time_slots.time_slots.apply(do_mfcc)
        
        df_mfcc=pd.DataFrame(list(res),columns=[f'mfcc{i}' for i in range(MFCC_COMPONENT)])
        df_mfcc.to_csv(folder+'Sound_MFCC.csv',index=False)

        print(f'{i+1}/{num_folders}. Processed "Analysis_GPS.csv" and added file "Sound_MFCC.csv" in folder: {folder}\n')
        
print('Completed.')

1/102. Processed "Analysis_GPS.csv" and added file "Sound_MFCC.csv" in folder: ./Trails\down\01-07-2019\

2/102. Processed "Analysis_GPS.csv" and added file "Sound_MFCC.csv" in folder: ./Trails\down\01-07-2019_DATA_17_03_44\

3/102. Processed "Analysis_GPS.csv" and added file "Sound_MFCC.csv" in folder: ./Trails\down\02-07-2019\

4/102. Processed "Analysis_GPS.csv" and added file "Sound_MFCC.csv" in folder: ./Trails\down\02-07-2019_DATA_09_52_39\

5/102. Processed "Analysis_GPS.csv" and added file "Sound_MFCC.csv" in folder: ./Trails\down\02-07-2019_DATA_11_11_23\

6/102. Processed "Analysis_GPS.csv" and added file "Sound_MFCC.csv" in folder: ./Trails\down\02-07-2019_DATA_12_06_05\

7/102. Processed "Analysis_GPS.csv" and added file "Sound_MFCC.csv" in folder: ./Trails\down\03-07-2019\

8/102. Processed "Analysis_GPS.csv" and added file "Sound_MFCC.csv" in folder: ./Trails\down\03-07-2019_DATA_09_56_40\

9/102. Processed "Analysis_GPS.csv" and added file "Sound_MFCC.csv" in folder: ./T

73/102. Processed "Analysis_GPS.csv" and added file "Sound_MFCC.csv" in folder: ./Trails\up\05-09-2019\

74/102. Processed "Analysis_GPS.csv" and added file "Sound_MFCC.csv" in folder: ./Trails\up\06-07-2019\

75/102. Processed "Analysis_GPS.csv" and added file "Sound_MFCC.csv" in folder: ./Trails\up\06-07-2019_DATA_08_40_03\

76/102. Processed "Analysis_GPS.csv" and added file "Sound_MFCC.csv" in folder: ./Trails\up\06-07-2019_DATA_08_57_09\

77/102. Processed "Analysis_GPS.csv" and added file "Sound_MFCC.csv" in folder: ./Trails\up\06-07-2019_DATA_10_07_40\

78/102. Processed "Analysis_GPS.csv" and added file "Sound_MFCC.csv" in folder: ./Trails\up\06-07-2019_DATA_11_00_08\

79/102. Processed "Analysis_GPS.csv" and added file "Sound_MFCC.csv" in folder: ./Trails\up\06-07-2019_DATA_16_58_00\

80/102. Processed "Analysis_GPS.csv" and added file "Sound_MFCC.csv" in folder: ./Trails\up\12-09-2019\

81/102. Processed "Analysis_GPS.csv" and added file "Sound_MFCC.csv" in folder: ./Trails\u

In [5]:
#NICE