In [2]:
'''

Author: Madhav Sachdeva
Date: 5 March, 2022
Title: Helper functions for extracting features from audio data

'''

import numpy as np
import pandas as pd
import os
from scipy import stats
import opensmile



SoX could not be found!

    If you do not have SoX, proceed here:
     - - - http://sox.sourceforge.net/ - - -

    If you do (or think that you should) have SoX, double-check your
    path variables.
    


In [3]:
# Global variable

folder_path = "../data/VAM_Audio/data/speech"

In [4]:
def extract_audio_features(audio_file):
    
    smile = opensmile.Smile(feature_set=opensmile.FeatureSet.eGeMAPSv02,feature_level=opensmile.FeatureLevel.Functionals) #latest version advised, Functionals level give 88 features we want
    features = smile.process_file(audio_file)
    
    return(features)


In [5]:
filepath = "../data/VAM_Audio/data/speech\Satz01000.wav"

audio_filepath = extract_audio_features(filepath)

audio_filepath.columns

Index(['F0semitoneFrom27.5Hz_sma3nz_amean',
       'F0semitoneFrom27.5Hz_sma3nz_stddevNorm',
       'F0semitoneFrom27.5Hz_sma3nz_percentile20.0',
       'F0semitoneFrom27.5Hz_sma3nz_percentile50.0',
       'F0semitoneFrom27.5Hz_sma3nz_percentile80.0',
       'F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2',
       'F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope',
       'F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope',
       'F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope',
       'F0semitoneFrom27.5Hz_sma3nz_stddevFallingSlope', 'loudness_sma3_amean',
       'loudness_sma3_stddevNorm', 'loudness_sma3_percentile20.0',
       'loudness_sma3_percentile50.0', 'loudness_sma3_percentile80.0',
       'loudness_sma3_pctlrange0-2', 'loudness_sma3_meanRisingSlope',
       'loudness_sma3_stddevRisingSlope', 'loudness_sma3_meanFallingSlope',
       'loudness_sma3_stddevFallingSlope', 'spectralFlux_sma3_amean',
       'spectralFlux_sma3_stddevNorm', 'mfcc1_sma3_amean',
       'mfcc1_sma3_stddevNorm', 'mfcc2_

In [6]:
features = []

for files in os.listdir(folder_path):

    audio_file = os.path.join(folder_path,files)
    audio_features = extract_audio_features(audio_file)
    
    # Check if audio features do not have 88 columns
    if(audio_features.shape[1]!=88):
       raise ValueError("Audio features not generating 88 length feature columns")
    
    #Append audio features for all utterances
    features.append(audio_features)
    features_all = pd.concat(features)
    
features_all



Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,F0semitoneFrom27.5Hz_sma3nz_amean,F0semitoneFrom27.5Hz_sma3nz_stddevNorm,F0semitoneFrom27.5Hz_sma3nz_percentile20.0,F0semitoneFrom27.5Hz_sma3nz_percentile50.0,F0semitoneFrom27.5Hz_sma3nz_percentile80.0,F0semitoneFrom27.5Hz_sma3nz_pctlrange0-2,F0semitoneFrom27.5Hz_sma3nz_meanRisingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevRisingSlope,F0semitoneFrom27.5Hz_sma3nz_meanFallingSlope,F0semitoneFrom27.5Hz_sma3nz_stddevFallingSlope,...,slopeUV0-500_sma3nz_amean,slopeUV500-1500_sma3nz_amean,spectralFluxUV_sma3nz_amean,loudnessPeaksPerSec,VoicedSegmentsPerSec,MeanVoicedSegmentLengthSec,StddevVoicedSegmentLengthSec,MeanUnvoicedSegmentLength,StddevUnvoicedSegmentLength,equivalentSoundLevel_dBp
file,start,end,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1,Unnamed: 22_level_1,Unnamed: 23_level_1
../data/VAM_Audio/data/speech\Satz01000.wav,0 days,0 days 00:00:01.160000,36.392868,0.044989,35.146160,36.659389,37.519577,2.373417,41.449814,34.086391,16.375031,24.223885,...,0.041852,-0.001624,0.047375,4.347826,2.727273,0.250000,0.028284,0.096667,0.026247,-28.172165
../data/VAM_Audio/data/speech\Satz01001.wav,0 days,0 days 00:00:02.680000,41.836590,0.123651,36.729897,43.845772,45.548477,8.818581,123.041252,155.535568,93.948097,125.570007,...,0.067274,-0.000077,0.266241,4.119851,4.198473,0.144545,0.134459,0.078000,0.044677,-22.474688
../data/VAM_Audio/data/speech\Satz01002.wav,0 days,0 days 00:00:00.880000,41.177475,0.109360,36.536976,41.376694,45.767769,9.230793,2.449765,13.696585,41.226830,0.000000,...,0.056440,-0.006060,0.211936,4.597701,2.469136,0.380000,0.310000,0.020000,0.000000,-23.281940
../data/VAM_Audio/data/speech\Satz01003.wav,0 days,0 days 00:00:06.600000,42.175087,0.082657,38.474167,42.464424,45.087883,6.613716,52.681797,24.875019,35.046581,11.206066,...,0.043163,0.003311,0.132167,4.097117,2.752294,0.227778,0.162978,0.128750,0.105468,-24.944466
../data/VAM_Audio/data/speech\Satz01004.wav,0 days,0 days 00:00:01,37.797287,0.050933,36.559074,37.315975,37.930424,1.371349,31.788879,1.799660,24.623688,19.361744,...,0.043830,0.000385,0.038184,4.040404,2.127660,0.355000,0.075000,0.090000,0.040000,-32.811920
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
../data/VAM_Audio/data/speech\Satz47001.wav,0 days,0 days 00:00:02.640000,38.040768,0.056421,36.676113,37.234665,38.894108,2.217995,48.922939,44.467754,16.844824,8.927925,...,0.044435,-0.009180,0.254407,6.844107,4.263566,0.113636,0.065265,0.091667,0.023393,-24.807896
../data/VAM_Audio/data/speech\Satz47002.wav,0 days,0 days 00:00:02.480000,38.078621,0.048065,37.068325,38.208485,38.942944,1.874619,70.530754,49.477333,73.441734,101.314835,...,0.047353,-0.008197,0.175132,4.858300,2.892562,0.168571,0.107229,0.136250,0.120202,-24.144989
../data/VAM_Audio/data/speech\Satz47003.wav,0 days,0 days 00:00:01.480000,36.652027,0.095561,35.036156,38.011032,38.583492,3.547337,142.467529,78.294075,28.453882,5.358686,...,0.051549,-0.010839,0.199869,6.122449,4.225352,0.121667,0.074926,0.080000,0.028785,-25.068474
../data/VAM_Audio/data/speech\Satz47004.wav,0 days,0 days 00:00:03.560000,37.676373,0.053870,35.586765,37.507458,39.633289,4.046524,25.568827,10.601388,22.620510,13.198905,...,0.047889,-0.012582,0.183101,4.507042,2.873563,0.221000,0.141099,0.107000,0.100305,-23.219284


In [7]:
features_all.to_csv('audio_features.csv',index=True)