mfcc: 人の感知特性を考慮したcepstrum特徴

In [1]:
import librosa
import numpy as np
import glob

def mfcc(origin_data):
    D = np.abs(librosa.stft(origin_data))
    D_dB = librosa.amplitude_to_db(D, ref=np.max)

    # メルスペクトログラムを算出
    S = librosa.feature.melspectrogram(S=D, sr=sampling_rate)
    S_dB = librosa.amplitude_to_db(S, ref=np.max)

    # MFCCを算出
    mfcc = librosa.feature.mfcc(S=S_dB, n_mfcc=20, dct_type=3)
    return mfcc.T

def lpc(origin_data, num_win, winsz = 2048, hopsz = 512):
    arr2d_win = np.array([])
    start = 0
    end = winsz
    for i in range(num_win):
        arr_win = origin_data[start:end]
        if len(arr_win) < 2048:
            zero_padding = np.zeros(2048 - len(arr_win))
    #         print('padding_size: ', len(zero_padding))
            arr_win = np.append(arr_win, zero_padding)
    #     print(len(arr_win))
        arr2d_win = np.append(arr2d_win, arr_win)
        start += hopsz
        end += hopsz
    arr2d_win_ = arr2d_win.reshape((-1, 2048))
    lpc_output = librosa.lpc(arr2d_win_, order=16)
    return lpc_output

In [3]:
paths = sorted(
    glob.glob('../data/voice-stat/*/*_00*') + 
    glob.glob('../data/voice-stat/*/*_01*') + 
    glob.glob('../data/voice-stat/*/*_020*')
)

In [4]:
data_mfcc = np.empty((0, 20))
data_lpc = np.empty((0, 17))
labels = np.empty(0)

# i = 0 to 59
for i, path in enumerate(paths):
    if i %10 == 0:
        print(f'i = {i}')
    data_origin, sampling_rate = librosa.load(path)
    features_mfcc = mfcc(data_origin)
    features_lpc = lpc(data_origin, features_mfcc.shape[0])
    
    data_mfcc = np.vstack((data_mfcc, features_mfcc))
    data_lpc = np.vstack((data_lpc, features_lpc))
    
    labels = np.append(labels, [i//20]*features_lpc.shape[0])

i = 0
i = 10
i = 20
i = 30
i = 40
i = 50


In [5]:
import pandas as pd
df_mfcc = pd.DataFrame(data_mfcc)
df_lpc = pd.DataFrame(data_lpc)
df_mfcc['label'] = df_lpc['label'] = labels

In [6]:
df_mfcc.to_csv('../data/csv/mfcc.csv')
df_lpc.to_csv('../data/csv/lpc.csv')

In [8]:
from sklearn import svm
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score

In [10]:
X_mfcc = np.array(df_mfcc.iloc[:, :19])
y_mfcc = np.array(df_mfcc.iloc[:, 20])

X_tr, X_ts, y_tr, y_ts = train_test_split(X_mfcc, y_mfcc, test_size=0.2, random_state=0)

param_grid = {'C': [0.1, 1, 10, 100, 1000],  
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 
              'kernel': ['rbf']}
print('グリッドサーチの範囲\n', param_grid, '\n')
clf = GridSearchCV(svm.SVC(), param_grid, n_jobs = -1)
clf.fit(X_tr, y_tr)
print('最適なパラメータ\nC_opt: {}, gamma_opt: {}\n'.format(clf.best_params_['C'], clf.best_params_['gamma']))
y_ts_pred = clf.predict(X_ts)
print('best accuracy\n', accuracy_score(y_ts_pred, y_ts))

グリッドサーチの範囲
 {'C': [0.1, 1, 10, 100, 1000], 'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 'kernel': ['rbf']} 

最適なパラメータ
C_opt: 10, gamma_opt: 0.0001

best accuracy
 0.7975966069745523


In [14]:
X_lpc = np.array(df_lpc.iloc[:, :16])
y_lpc = np.array(df_lpc.iloc[:, 17])
X_tr, X_ts, y_tr, y_ts = train_test_split(X_lpc, y_lpc, test_size=0.2, random_state=0)

param_grid = {'C': [0.1, 1, 10, 100, 1000],  
              'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 
              'kernel': ['rbf']}
print('グリッドサーチの範囲\n', param_grid, '\n')
clf = GridSearchCV(svm.SVC(), param_grid, n_jobs = -1)
clf.fit(X_tr, y_tr)
print('最適なパラメータ\nC_opt: {}, gamma_opt: {}\n'.format(clf.best_params_['C'], clf.best_params_['gamma']))
y_ts_pred = clf.predict(X_ts)
print('best accuracy\n', accuracy_score(y_ts_pred, y_ts))

グリッドサーチの範囲
 {'C': [0.1, 1, 10, 100, 1000], 'gamma': [1, 0.1, 0.01, 0.001, 0.0001], 'kernel': ['rbf']} 

最適なパラメータ
C_opt: 100, gamma_opt: 1

best accuracy
 0.7834590009425071


# 参考

[librosa.feature.mfcc¶](https://librosa.org/doc/main/generated/librosa.feature.mfcc.html)

[librosa.lpc¶](https://librosa.org/doc/main/generated/librosa.lpc.html)

[MFCC（メル周波数ケプストラム係数）入門](https://qiita.com/tmtakashi_dist/items/eecb705ea48260db0b62)

In [62]:
# feature * num_data * num_person * num_win *  size_float32
17*20*3*452*4/1000/1000 

1.84416