In [124]:
import textgrid as tg
import os
import numpy as np
import pandas as pd

from pydub import AudioSegment

import h5py

import math
import datetime

from python_speech_features import mfcc
from python_speech_features import delta
from python_speech_features import logfbank

In [125]:
class FeatureExtractor:
    def __init__(self, features):
        self.features = features
        self.df = pd.DataFrame(columns=["label", "features"])
    
    def __loadWav(self, path):
        sig = AudioSegment.from_file(path, format="wav")

        return sig
    
    def __extractFeatures(self,segment):
        samples = np.array(segment.get_array_of_samples())
        frame_rate = segment.frame_rate

        mfcc_feat = mfcc(samples, frame_rate, nfft=1103)
        d_mfcc_feat = delta(mfcc_feat, 2)
        fbank_feat = logfbank(samples, frame_rate, nfft=1103)

        return np.concatenate((np.array(mfcc_feat),np.array(d_mfcc_feat),np.array(fbank_feat)), axis=1)

    def extract(self, path):
        for i, file in enumerate(os.listdir(path)):        
            if file.endswith(".TextGrid"): 
                wav_path = os.path.join(path, os.path.splitext(file)[0] + '.wav')
                if os.path.isfile(wav_path): 
                    sig = self.__loadWav(wav_path)
                    
                    textgrid_path = os.path.join(path, file)
                    textgrid = tg.TextGrid.fromFile(textgrid_path)

                    humorous_tier = textgrid[1]
                    for interval in humorous_tier: #humorous tier
                        if interval.mark in self.features:
                            start = int(interval.minTime * 1000)
                            end = int(interval.maxTime * 1000)
                            trimmed_sig = sig[start:end]
                            label = interval.mark
                            
                            feat = self.__extractFeatures(trimmed_sig).astype(np.float32).tolist()
                            self.df.loc[self.df.shape[0]] = [label,feat] 
        return self.df
    
    def saveFeatures(self, path):
        filename = 'features{}.h5'.format(datetime.datetime.now().strftime("%Y_%m_%d_%H_%M_%S"))
        self.df.to_hdf(os.path.join(path, filename),'df',mode='w',format='fixed',data_columns=True, compression='zlib')

In [126]:
extractor = FeatureExtractor(['H', 'N'])

extractor.extract('data/')

Unnamed: 0,label,features
0,N,"[[9.126991271972656, -11.824827194213867, 5.18..."
1,H,"[[10.176518440246582, -9.401527404785156, -1.1..."
2,N,"[[11.156208992004395, -3.267765998840332, 7.26..."
3,N,"[[12.80535888671875, -13.97295093536377, -13.5..."
4,H,"[[10.152922630310059, -4.378045558929443, 13.3..."
5,N,"[[12.497206687927246, -7.099395751953125, -16...."
6,H,"[[8.958751678466797, -11.638936042785645, 11.4..."
7,N,"[[11.280777931213379, -4.5946455001831055, 1.4..."
8,H,"[[9.217308044433594, -14.45579719543457, 8.805..."
9,H,"[[11.548381805419922, -6.058077812194824, 10.7..."


In [127]:
extractor.saveFeatures('data/')

your performance may suffer as PyTables will pickle object types that it cannot
map directly to c-types [inferred_type->mixed,key->block0_values] [items->['label', 'features']]

  return pytables.to_hdf(path_or_buf, key, self, **kwargs)
