In [1]:
import textgrid as tg
import os
import numpy as np
import pandas as pd

from pydub import AudioSegment

import librosa
import librosa.display

import h5py

import sklearn

import math
import datetime

import soundfile as sf

import matplotlib.pyplot as plt
import IPython.display
plt.rcParams['figure.figsize'] = (14,4)

  from ._conv import register_converters as _register_converters


In [1]:
class FeatureExtractor:
    def __init__(self, marks):
        self.marks = marks
        self.features = np.empty((0,227))
        self.labels = np.empty(0)
        self.df = pd.DataFrame(columns=["label", "features"])
    
    def __loadWav(self, path):
#         sig = AudioSegment.from_file(path, format="wav")
        X, sample_rate = sf.read(path, dtype='float32')

        return X, sample_rate
    
    def __extractFeatures(self,X, sample_rate):
        X = X.T

        # short term fourier transform
        stft = np.abs(librosa.stft(X))

        # mfcc and delta
        mfcc_features = librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=40)
        
        mfccs = np.mean(mfcc_features.T,axis=0)
            
        deltas = np.mean(librosa.feature.delta(mfcc_features.T),axis=0)
        
        # chroma
        chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T,axis=0)

#         tonnetz = np.mean(librosa.feature.tonnetz(y=librosa.effects.harmonic(X), sr=sample_rate).T,axis=0)
        return mfccs,deltas,chroma,mel,contrast

    def extract(self, path):
        for i, file in enumerate(os.listdir(path)):        
            if file.endswith(".TextGrid"): 
                wav_path = os.path.join(path, os.path.splitext(file)[0] + '.wav')
                if os.path.isfile(wav_path): 
                    sig, sample_rate = self.__loadWav(wav_path)
                    
                    textgrid_path = os.path.join(path, file)
                    textgrid = tg.TextGrid.fromFile(textgrid_path)

                    humorous_tier = textgrid[1]
                    for interval in humorous_tier: #humorous tier
                        if interval.mark in self.marks:
                            start = int(interval.minTime * 1000)
                            end = int(interval.maxTime * 1000)
                            trimmed_sig = sig[start:end]
                            label = self.marks.index(interval.mark)
                            
                            mfccs, deltas, chroma, mel, contrast = self.__extractFeatures(trimmed_sig, sample_rate)
                            ext_features = np.hstack([mfccs,deltas,chroma,mel,contrast])
                            self.features = np.vstack([self.features,ext_features])
                            
                            self.labels = np.append(self.labels, label)
                            
            
        return np.array(self.features), np.array(self.labels, dtype = np.int)

    def saveNumpy(self, path):
        print(self.labels.shape,self.features.shape)
        np.save(os.path.join(path, 'feat.npy'), self.features)
        np.save(os.path.join(path, 'label.npy'), self.labels)

In [13]:
extractor = FeatureExtractor(['H', 'N'])

extractor.extract('data/')

(array([[-574.19048652,  122.13885795,   25.8610535 , ...,   17.1445783 ,
           16.24557289,   18.24968235],
        [-517.54458032,  130.55309811,   -7.09663712, ...,   16.85812776,
           17.53970187,   19.61186726],
        [-525.63052949,  119.66464735,  -42.07406394, ...,   16.20020704,
           18.75778593,   22.73578931],
        ...,
        [-450.51683619,  172.94091484,   11.55364805, ...,   21.974382  ,
           16.99298938,   21.65583656],
        [-557.33204163,  127.62128152,   18.98406461, ...,   15.48677187,
           18.09035815,   19.24012012],
        [-526.25698228,  130.93894725,    1.58692943, ...,   18.13603571,
           17.10329408,   21.9967427 ]]),
 array([0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 0, 0, 0, 1, 0, 0, 1,
        0, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 1, 1, 1, 1,
        1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 0,
        0, 1, 1, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 0, 0,
     

In [14]:
extractor.saveNumpy('data/')

(222,) (222, 227)
