# Audio Feature Extraction

## 1. Importing Library

In [1]:
import os
from tqdm import tqdm
import pandas as pd
import numpy as np
from scipy.stats import skew, kurtosis
import librosa

import plotly.express as px
from plotly.subplots import make_subplots
import plotly.graph_objects as go

## 2. Get All Audio Files

In [2]:
data = []
path = '../data/guitar/wav'
tone_types = os.listdir(path)
tone_types.sort()
for tone_type in tone_types:
    subpath = os.path.join(path, tone_type)
    subtechniques = os.listdir(subpath)
    subtechniques.sort()
    for subtechnique in subtechniques:
        sub2path = os.path.join(subpath, subtechnique)
        audio_files = os.listdir(sub2path)
        audio_files.sort()
        for audio_file in audio_files:
            audio_path = os.path.join(sub2path, audio_file)
            technique = subtechnique.split('_')[0]
            data.append([technique, subtechnique, tone_type, audio_path])

columns = ['technique', 'subtechnique', 'tone_type', 'audio_path']
df = pd.DataFrame(data, columns=columns)
print(df.shape)
df

(6447, 4)


Unnamed: 0,technique,subtechnique,tone_type,audio_path
0,bending,bending_up_down_half,1,../data/guitar/wav/1/bending_up_down_half/bend...
1,bending,bending_up_down_half,1,../data/guitar/wav/1/bending_up_down_half/bend...
2,bending,bending_up_down_half,1,../data/guitar/wav/1/bending_up_down_half/bend...
3,bending,bending_up_down_half,1,../data/guitar/wav/1/bending_up_down_half/bend...
4,bending,bending_up_down_half,1,../data/guitar/wav/1/bending_up_down_half/bend...
...,...,...,...,...
6442,trill,trill,7,../data/guitar/wav/7/trill/trill_87.wav
6443,trill,trill,7,../data/guitar/wav/7/trill/trill_88.wav
6444,trill,trill,7,../data/guitar/wav/7/trill/trill_89.wav
6445,trill,trill,7,../data/guitar/wav/7/trill/trill_9.wav


## 3. Reconstructing Dataset

In [3]:
df2 = df.drop(df[df['subtechnique'].str.contains('normal_')].index).reset_index(drop=True)
df2 = df2.drop(df2[df2.technique == 'mute'].index).reset_index(drop=True)
df2.loc[(df2['subtechnique'] == 'bending_up_down_half') | (df2['subtechnique'] == 'bending_up_down_whole'), 'technique'] = 'bending_up_down'
df2.loc[(df2['subtechnique'] == 'bending_up_half') | (df2['subtechnique'] == 'bending_up_whole'), 'technique'] = 'bending_up'
df2.loc[(df2['subtechnique'] == 'slide_half_step_down') | (df2['subtechnique'] == 'slide_whole_step_down'), 'technique'] = 'slide_down'
df2.loc[(df2['subtechnique'] == 'slide_half_step_up') | (df2['subtechnique'] == 'slide_whole_step_up'), 'technique'] = 'slide_up'
print(df2.shape)
df2

(4935, 4)


Unnamed: 0,technique,subtechnique,tone_type,audio_path
0,bending_up_down,bending_up_down_half,1,../data/guitar/wav/1/bending_up_down_half/bend...
1,bending_up_down,bending_up_down_half,1,../data/guitar/wav/1/bending_up_down_half/bend...
2,bending_up_down,bending_up_down_half,1,../data/guitar/wav/1/bending_up_down_half/bend...
3,bending_up_down,bending_up_down_half,1,../data/guitar/wav/1/bending_up_down_half/bend...
4,bending_up_down,bending_up_down_half,1,../data/guitar/wav/1/bending_up_down_half/bend...
...,...,...,...,...
4930,trill,trill,7,../data/guitar/wav/7/trill/trill_87.wav
4931,trill,trill,7,../data/guitar/wav/7/trill/trill_88.wav
4932,trill,trill,7,../data/guitar/wav/7/trill/trill_89.wav
4933,trill,trill,7,../data/guitar/wav/7/trill/trill_9.wav


## 4. Data Distribution

In [4]:
tone_count = pd.DataFrame(pd.value_counts(df2['tone_type'])).reset_index()
technique_count = pd.DataFrame(pd.value_counts(df2['technique'])).reset_index()
subtechnique_count = pd.DataFrame(pd.value_counts(df2['subtechnique'])).reset_index()

fig = make_subplots(rows=3, cols=1)
fig.add_trace(go.Bar(x=tone_count['index'], y=tone_count['tone_type'], name='Tone Type'), row=1, col=1)
fig.add_trace(go.Bar(x=technique_count['index'], y=technique_count['technique'], name='Technique'), row=2, col=1)
fig.add_trace(go.Bar(x=subtechnique_count['index'], y=subtechnique_count['subtechnique'], name='Subtechnique'), row=3, col=1)
fig.update_layout(height=800, width=600)
fig.show()

## 5. Define function for Features Extraction

In [5]:
def get_features(path):
    signal, sr = librosa.load(path, sr=22050, duration=4.0)

    mfcc = librosa.feature.mfcc(y=signal, sr=sr, n_mfcc=13)
    delta_mfcc = librosa.feature.delta(mfcc)
    delta2_mfcc = librosa.feature.delta(mfcc, order=2)

    mfccs = np.hstack((
                    np.mean(mfcc, axis=1),
                    np.std(mfcc, axis=1)
                ))
    delta_mfccs = np.hstack((
                    np.mean(delta_mfcc, axis=1),
                    np.std(delta_mfcc, axis=1)
                ))
    delta2_mfccs = np.hstack((
                    np.mean(delta2_mfcc, axis=1),
                    np.std(delta2_mfcc, axis=1)
                ))

    extracted_features = np.hstack((mfccs, delta_mfccs, delta2_mfccs))
    return extracted_features

## 6. Get Audio Features for GPT Audio Data

In [6]:
gpt_data = []
for i in tqdm(range(df2.shape[0])):
    technique = df2['technique'][i]
    subtechnique = df2['subtechnique'][i]
    tone_type = df2['tone_type'][i]
    audio_path = df2['audio_path'][i]
    annotation = [technique, subtechnique, tone_type, audio_path]
    features = get_features(audio_path)
    features = np.append(features, annotation)
    gpt_data.append(features)
print('Done!!!')

100%|██████████| 4935/4935 [33:04<00:00,  2.49it/s]

Done!!!





## 7. Define Columns Name to Create Dataframe

In [7]:
columns = []
names = ['mfcc', 'delta_mfcc', 'delta2_mfcc']
stats = ['mean', 'std']
file_desc = ['technique', 'subtechnique', 'tone_type', 'audio_path']

for name in names:
    for stat in stats:
        for i in range(13):
            col = f'{stat}_{name}_{i+1}'
            columns = np.append(columns, col)

columns = np.append(columns, file_desc)
columns.shape

(82,)

## 8. Convert Extracted Feature (List) to Dataframe

In [8]:
gpt_df = pd.DataFrame(gpt_data, columns=columns)
print(gpt_df.shape)

(4935, 82)


## 9. Save Dataframe to CSV File

In [9]:
gpt_df.to_csv('../data/gpt.csv', index=False)

## 10. Try to Load CSV File

In [2]:
gpt = pd.read_csv('../data/gpt.csv')
gpt.iloc[:,:78]

Unnamed: 0,mean_mfcc_1,mean_mfcc_2,mean_mfcc_3,mean_mfcc_4,mean_mfcc_5,mean_mfcc_6,mean_mfcc_7,mean_mfcc_8,mean_mfcc_9,mean_mfcc_10,...,std_delta2_mfcc_4,std_delta2_mfcc_5,std_delta2_mfcc_6,std_delta2_mfcc_7,std_delta2_mfcc_8,std_delta2_mfcc_9,std_delta2_mfcc_10,std_delta2_mfcc_11,std_delta2_mfcc_12,std_delta2_mfcc_13
0,-355.03528,50.418660,-31.403925,50.348953,-30.673700,1.969854,-22.432991,-18.882349,-19.687302,-17.108780,...,0.779445,0.426551,0.623370,0.449686,0.595603,0.608002,0.556226,0.422851,0.376067,0.295037
1,-409.34760,54.172573,-14.259913,34.271140,-19.298210,-1.316164,-22.821796,-26.808441,-35.983765,-20.753984,...,1.406769,0.646310,0.680021,0.575558,0.989656,0.586629,0.618020,0.486782,0.560354,0.306096
2,-437.37173,51.660965,-6.854233,29.953080,-18.832767,-4.724772,-27.711292,-29.034687,-36.362514,-12.233845,...,1.335319,0.912139,0.631745,0.650700,0.940266,0.749098,0.570480,0.602685,0.513172,0.474988
3,-438.06467,49.455948,-12.179679,30.181612,-20.404844,-2.694792,-28.129316,-27.830496,-26.912025,-4.806398,...,1.126964,0.727060,0.521730,0.813947,1.064398,0.784792,0.467529,0.632194,0.408578,0.670854
4,-421.00162,57.186110,-10.938198,24.475685,-18.420061,-7.810924,-27.905037,-26.687890,-18.443436,-3.099151,...,1.092148,0.546977,0.885436,0.845619,0.938890,0.515301,0.455240,0.450919,0.419333,0.728170
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4930,-448.43590,87.491410,-10.885900,3.775053,-8.661828,-28.545597,-11.102462,13.196578,21.780603,10.927249,...,1.316339,1.179968,1.166556,1.612248,1.332319,1.106352,0.950605,0.668427,1.002614,1.238254
4931,-452.21283,77.595474,-12.620891,8.281128,-5.475756,-16.596151,1.931262,14.497064,22.093060,8.834344,...,1.260314,1.043215,0.960883,1.025139,0.779285,0.826061,0.744555,0.620688,1.188630,0.863979
4932,-471.79144,76.883736,-17.900557,3.402666,-9.468511,-14.717384,10.544386,21.593449,19.314589,0.899425,...,1.270198,1.194653,1.027984,1.106710,1.157028,0.846509,0.664322,0.961321,1.070644,0.602715
4933,-381.48682,123.817200,2.646233,15.896898,16.420760,-2.155163,-0.675577,-4.677337,-4.736665,-5.338490,...,1.133364,0.484727,0.506073,0.591242,0.609270,0.436886,0.329715,0.406841,0.463078,0.481059


In [3]:
px.bar(pd.value_counts(gpt['technique']).reset_index(), x='index', y='technique', width=500, height=400)