In [1]:
import src.utils

In [4]:
# Example 1: short-term feature extraction
from pyAudioAnalysis import ShortTermFeatures as aF
from pyAudioAnalysis import audioBasicIO as aIO 
from pyAudioAnalysis import MidTermFeatures as mTF
from sklearn.svm import SVC
import numpy as np 
import plotly.graph_objs as go 
import plotly
import IPython
import os


In [5]:
mp3_file = src.utils.get_sel_mp3_fname(167)

In [None]:

# read audio data from file 
# (returns sampling freq and signal as a numpy array)

fs, s = aIO.read_audio_file(mp3_file)
# play the initial and the generated files in notebook:
IPython.display.display(IPython.display.Audio(mp3_file))
# print duration in seconds:
duration = len(s) / float(fs)
print(f'duration = {duration} seconds')
# extract short-term features using a 50msec non-overlapping windows
win, step = 0.150, 0.150
[f, fn] = aF.feature_extraction(s, fs, int(fs * win), 
                                int(fs * step))
print(f'{f.shape[1]} frames, {f.shape[0]} short-term features')
print('Feature names:')
for i, nam in enumerate(fn):
    print(f'{i}:{nam}')
# plot short-term energy
# create time axis in seconds
time = np.arange(0, duration - step, win) 
# get the feature whose name is 'energy'
energy = f[fn.index('energy_entropy'), :]
mylayout = go.Layout(yaxis=dict(title="frame energy value"),
                     xaxis=dict(title="time (sec)"))
plotly.offline.iplot(go.Figure(data=[go.Scatter(x=time, 
                                                y=energy)], 
                               layout=mylayout))

In [46]:
energy = f[fn.index('mfcc_1'), :]
mylayout = go.Layout(yaxis=dict(title="frame energy value"),
                     xaxis=dict(title="time (sec)"))
plotly.offline.iplot(go.Figure(data=[go.Scatter(x=time, 
                                                y=energy)], 
                               layout=mylayout))

In [48]:
# read audio data from file 
# (returns sampling freq and signal as a numpy array)
fs, s = aIO.read_audio_file(mp3_file)
# get mid-term (segment) feature statistics 
# and respective short-term features:
mt, st, mt_n = aF.mid_feature_extraction(s, fs, 1 * fs, 1 * fs, 
                                         0.05 * fs, 0.05 * fs)
print(f'signal duration {len(s)/fs} seconds')
print(f'{st.shape[1]} {st.shape[0]}-D short-term feature vectors extracted')
print(f'{mt.shape[1]} {mt.shape[0]}-D segment feature statistic vectors extracted')
print('mid-term feature names')
for i, mi in enumerate(mt_n):
    print(f'{i}:{mi}')

signal duration 274.7820408163265 seconds
5495 68-D short-term feature vectors extracted
275 136-D segment feature statistic vectors extracted
mid-term feature names
0:zcr_mean
1:energy_mean
2:energy_entropy_mean
3:spectral_centroid_mean
4:spectral_spread_mean
5:spectral_entropy_mean
6:spectral_flux_mean
7:spectral_rolloff_mean
8:mfcc_1_mean
9:mfcc_2_mean
10:mfcc_3_mean
11:mfcc_4_mean
12:mfcc_5_mean
13:mfcc_6_mean
14:mfcc_7_mean
15:mfcc_8_mean
16:mfcc_9_mean
17:mfcc_10_mean
18:mfcc_11_mean
19:mfcc_12_mean
20:mfcc_13_mean
21:chroma_1_mean
22:chroma_2_mean
23:chroma_3_mean
24:chroma_4_mean
25:chroma_5_mean
26:chroma_6_mean
27:chroma_7_mean
28:chroma_8_mean
29:chroma_9_mean
30:chroma_10_mean
31:chroma_11_mean
32:chroma_12_mean
33:chroma_std_mean
34:delta zcr_mean
35:delta energy_mean
36:delta energy_entropy_mean
37:delta spectral_centroid_mean
38:delta spectral_spread_mean
39:delta spectral_entropy_mean
40:delta spectral_flux_mean
41:delta spectral_rolloff_mean
42:delta mfcc_1_mean
43:del

In [51]:
mt.shape, st.shape, len(mt_n)

((136, 275), (68, 5495), 136)

In [53]:
fs, s.shape

(44100, (12117888,))

In [6]:
rt = 'Rufous-bellied Thrush'
yf = 'Yellow-olive Flatbill'

In [7]:
dirs = ["./data/sound_rt", "./data/sound_yf"] 
class_names = [rt, yf] 
m_win, m_step, s_win, s_step = 1, 1, 0.1, 0.05 
# segment-level feature extraction:
features = [] 
for d in dirs: # get feature matrix for each directory (class) 
    f, files, fn = mTF.directory_feature_extraction(d, m_win, m_step, 
                                                   s_win, s_step) 
    features.append(f)
# (each element of the features list contains a 
# (samples x segment features) = (10 x 138) feature matrix)
print(features[0].shape, features[1].shape)
    


Analyzing file 1 of 127: ./data/sound_rt/sound_1142.mp3
Analyzing file 2 of 127: ./data/sound_rt/sound_1196.mp3
Analyzing file 3 of 127: ./data/sound_rt/sound_1197.mp3
Analyzing file 4 of 127: ./data/sound_rt/sound_1207.mp3
Analyzing file 5 of 127: ./data/sound_rt/sound_1239.mp3
Analyzing file 6 of 127: ./data/sound_rt/sound_15459.mp3
Analyzing file 7 of 127: ./data/sound_rt/sound_1551.mp3
Analyzing file 8 of 127: ./data/sound_rt/sound_1552.mp3
Analyzing file 9 of 127: ./data/sound_rt/sound_1553.mp3
Analyzing file 10 of 127: ./data/sound_rt/sound_1554.mp3
Analyzing file 11 of 127: ./data/sound_rt/sound_1555.mp3
Analyzing file 12 of 127: ./data/sound_rt/sound_15712.mp3
Analyzing file 13 of 127: ./data/sound_rt/sound_167.mp3
Analyzing file 14 of 127: ./data/sound_rt/sound_16900.mp3
Analyzing file 15 of 127: ./data/sound_rt/sound_16920.mp3
Analyzing file 16 of 127: ./data/sound_rt/sound_16941.mp3
Analyzing file 17 of 127: ./data/sound_rt/sound_16942.mp3
Analyzing file 18 of 127: ./data/so

In [None]:
# select 2 features and create feature matrices for the two classes:
f1 = np.array([features[0][:, fn.index('spectral_centroid_mean')],
               features[0][:, fn.index('energy_entropy_mean')]])
f2 = np.array([features[1][:, fn.index('spectral_centroid_mean')],
               features[1][:, fn.index('energy_entropy_mean')]])
# plot 2D features
plots = [go.Scatter(x=f1[0, :],  y=f1[1, :], 
                    name=class_names[0], mode='markers'),
         go.Scatter(x=f2[0, :], y=f2[1, :], 
                    name=class_names[1], mode='markers')]
mylayout = go.Layout(xaxis=dict(title="spectral_centroid_mean"),
                     yaxis=dict(title="energy_entropy_mean"))
plotly.offline.iplot(go.Figure(data=plots, layout=mylayout))

In [60]:
f1 = np.array([features[0][:, fn.index('zcr_mean')],
               features[0][:, fn.index('mfcc_1_mean')]])
f2 = np.array([features[1][:, fn.index('zcr_mean')],
               features[1][:, fn.index('mfcc_1_mean')]])
# plot 2D features
plots = [go.Scatter(x=f1[0, :],  y=f1[1, :], 
                    name=class_names[0], mode='markers'),
         go.Scatter(x=f2[0, :], y=f2[1, :], 
                    name=class_names[1], mode='markers')]
mylayout = go.Layout(xaxis=dict(title="zcr_mean"),
                     yaxis=dict(title="mfcc_1_mean"))
plotly.offline.iplot(go.Figure(data=plots, layout=mylayout))

In [None]:
for fi1, f_1 in enumerate(fn):
    for fi2, f_2 in enumerate(fn):
        if fi1 != fi2:
            f1 = np.array([features[0][:, fi1],
                        features[0][:, fi2]])
            f2 = np.array([features[1][:, fi1],
                        features[1][:, fi2]])
            # plot 2D features
            plots = [go.Scatter(x=f1[0, :],  y=f1[1, :], 
                                name=class_names[0], mode='markers'),
                    go.Scatter(x=f2[0, :], y=f2[1, :], 
                                name=class_names[1], mode='markers')]
            mylayout = go.Layout(xaxis=dict(title=f_1),
                                yaxis=dict(title=f_2))
            plotly.offline.iplot(go.Figure(data=plots, layout=mylayout))