# Audio Feature Extraction

## 1. Importing Library

In [1]:
import os
import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis
import librosa

## 2. Define function for extraction

In [2]:
def get_features(path):

    signal, sr = librosa.load(path)

    mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
    delta_mfcc = librosa.feature.delta(mfcc)
    delta2_mfcc = librosa.feature.delta(mfcc, order=2)

    mfccs = np.hstack((
                    np.mean(mfcc, axis=1),
                    np.std(mfcc, axis=1),
                    np.max(mfcc, axis=1),
                    np.median(mfcc, axis=1),
                    np.min(mfcc, axis=1),
                    skew(mfcc, axis=1),
                    kurtosis(mfcc, axis=1)
                ))
    delta_mfccs = np.hstack((
                    np.mean(delta_mfcc, axis=1),
                    np.std(delta_mfcc, axis=1),
                    np.max(delta_mfcc, axis=1),
                    np.median(delta_mfcc, axis=1),
                    np.min(delta_mfcc, axis=1),
                    skew(delta_mfcc, axis=1),
                    kurtosis(delta_mfcc, axis=1)
                ))
    delta2_mfccs = np.hstack((
                    np.mean(delta2_mfcc, axis=1),
                    np.std(delta2_mfcc, axis=1),
                    np.max(delta2_mfcc, axis=1),
                    np.median(delta2_mfcc, axis=1),
                    np.min(delta2_mfcc, axis=1),
                    skew(delta2_mfcc, axis=1),
                    kurtosis(delta2_mfcc, axis=1)
                ))

    extracted_features = np.hstack((mfccs, delta_mfccs, delta2_mfccs))
    return extracted_features

## 3. Get Audio Features for GPT Audio Data

In [3]:
# dataset_path = '../data/wav'
dataset_path = '../data/wav_split'

data = []
for i, (root, _, files) in enumerate(os.walk(dataset_path)):
    if root is not dataset_path:
        if not files:
            tone_type = root.split('/')[-1]
            # print(f'TONE TYPE: {tone_type}')
        else:
            subtechnique = root.split('/')[-1]
            technique = subtechnique.split('_')[0]
            # print(f'TECHNIQUE: {technique}')
            # print(f'SUBTECHNIQUE: {subtechnique}')
            for file in files:
                file_path = os.path.join(root, file)
                features = get_features(file_path)
                features = np.append(features, [tone_type, technique, subtechnique, file_path])
                data.append(features)
print('Done!!!')

Done!!!


## 4. Define Columns Name to Create Dataframe

In [4]:
columns = []
names = ['mfcc', 'delta_mfcc', 'delta2_mfcc']
stats = ['mean', 'std', 'max', 'median', 'min', 'skew', 'kurtosis']
file_desc = ['tone_type', 'technique', 'sub_technique', 'file_path']

for name in names:
    for stat in stats:
        for i in range(13):
            col = f'{stat}_{name}_{i+1}'
            columns = np.append(columns, col)

columns = np.append(columns, file_desc)
columns.shape

(277,)

## 5. Convert Extracted Feature (List) to Dataframe

In [5]:
df = pd.DataFrame(data, columns=columns)
df.shape

(7657, 277)

## 6. Save Dataframe to CSV File

In [6]:
# df.to_csv('../data/gpt.csv', index=False)
df.to_csv('../data/gpt_split.csv', index=False)

## 7. Load CSV File

In [7]:
# gpt = pd.read_csv('../data/gpt.csv')
gpt = pd.read_csv('../data/gpt_split.csv')
gpt

Unnamed: 0,mean_mfcc_1,mean_mfcc_2,mean_mfcc_3,mean_mfcc_4,mean_mfcc_5,mean_mfcc_6,mean_mfcc_7,mean_mfcc_8,mean_mfcc_9,mean_mfcc_10,...,kurtosis_delta2_mfcc_8,kurtosis_delta2_mfcc_9,kurtosis_delta2_mfcc_10,kurtosis_delta2_mfcc_11,kurtosis_delta2_mfcc_12,kurtosis_delta2_mfcc_13,tone_type,technique,sub_technique,file_path
0,-462.77118,126.876144,-30.820320,21.706020,16.523281,-25.490458,-13.736227,-27.322334,-13.508459,-7.359414,...,-0.550029,-1.649426,0.026621,1.972903,-1.258069,-1.819546,6,hamming,hamming_half_step,../data/wav_split/6/hamming_half_step/hamming_...
1,-530.52490,93.725330,-39.037434,-4.479851,-20.150124,-17.046190,25.775227,51.612923,40.953102,-8.992085,...,-0.417750,-0.315448,0.676565,0.572439,-1.770167,-1.901417,6,hamming,hamming_half_step,../data/wav_split/6/hamming_half_step/hamming_...
2,-438.21292,141.950060,-9.433838,7.212989,2.793774,-21.534147,-6.310059,-16.381878,-18.221027,-13.947206,...,-1.832178,-1.765308,-1.804162,-0.630712,0.099910,0.576993,6,hamming,hamming_half_step,../data/wav_split/6/hamming_half_step/hamming_...
3,-428.30045,128.527250,0.124231,7.482684,3.537882,-24.137173,-13.788621,-22.357704,-23.896505,-14.273493,...,-1.658522,1.790880,-1.846894,-1.876913,-1.890087,-0.089512,6,hamming,hamming_half_step,../data/wav_split/6/hamming_half_step/hamming_...
4,-498.30170,145.041690,-10.504293,7.761322,2.013798,-26.044313,-13.251748,-23.543161,-14.636507,-5.031464,...,-1.887398,2.807289,-1.624316,-1.834614,-1.919023,-1.820262,6,hamming,hamming_half_step,../data/wav_split/6/hamming_half_step/hamming_...
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7652,-192.05267,36.373207,-41.765950,52.216854,-16.862871,9.718247,-14.838839,-14.813597,-33.602260,-34.855343,...,-1.659349,2.008721,-1.652494,-0.414528,0.760430,0.724007,3,hamming,hamming_whole_step,../data/wav_split/3/hamming_whole_step/hamming...
7653,-208.97202,25.144392,-45.765255,58.247990,-16.275730,-7.294288,-30.302073,-21.640614,-45.537685,-28.055300,...,-0.117636,-0.679620,-1.752346,2.158094,-1.688273,1.857633,3,hamming,hamming_whole_step,../data/wav_split/3/hamming_whole_step/hamming...
7654,-249.35416,-0.233360,-52.419010,48.536217,-36.656480,-31.731333,-31.774345,-5.080925,5.155398,16.961800,...,0.140658,-1.681437,-1.768920,-1.480259,-1.817897,-1.906327,3,hamming,hamming_whole_step,../data/wav_split/3/hamming_whole_step/hamming...
7655,-175.63824,32.764416,-56.552536,48.904472,-23.516140,13.669778,-22.247383,-14.150379,-26.149586,-27.904728,...,-0.699870,0.646151,-1.754304,2.604471,-1.756243,-1.702046,3,hamming,hamming_whole_step,../data/wav_split/3/hamming_whole_step/hamming...
