## 실행 X

In [None]:
import os
import librosa
import matplotlib.pyplot as plt
import numpy as np
from sklearn.decomposition import KernelPCA
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.svm import SVC

import numpy as np
import matplotlib.pyplot as plt

In [None]:
# LFCC feature extraction function
def compute_lfcc(y, sr, n_lfcc=15, n_fft=2048, hop_length=512):
    S = np.abs(librosa.stft(y, n_fft=n_fft, hop_length=hop_length))**2
    log_S = np.log(S + 1e-6)
    N, _ = log_S.shape
    n = np.arange(N)
    k = np.arange(n_lfcc)[:, None]
    dct_basis = np.cos(np.pi * k * (2*n + 1) / (2 * N))
    lfcc = dct_basis.dot(log_S)
    return lfcc

In [None]:
# CQCC feature extraction function
def compute_cqcc(y, sr, n_cqcc=15, hop_length=512):
    cqt_spec = np.abs(librosa.cqt(y=y, sr=sr, n_bins=84, bins_per_octave=12, hop_length=hop_length))
    log_cqt = np.log(cqt_spec + 1e-6)
    N_bins, _ = log_cqt.shape
    n = np.arange(N_bins)
    k = np.arange(n_cqcc)[:, None]
    dct_basis_cqcc = np.cos(np.pi * k * (2*n + 1) / (2 * N_bins))
    cqcc = dct_basis_cqcc.dot(log_cqt)
    return cqcc

In [None]:
# preprocessing function
def extract_features_from_path(wav_path):
    y, sr = librosa.load(wav_path, sr=16000)
    
    # MFCC + delta + delta-delta
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=30)
    mfcc_delta = librosa.feature.delta(mfcc)
    mfcc_delta2 = librosa.feature.delta(mfcc, order=2)
    mfcc_combined = np.concatenate((mfcc, mfcc_delta, mfcc_delta2), axis=0)
    mfcc_mean = np.mean(mfcc_combined, axis=1)
    mfcc_std = np.std(mfcc, axis=1)
    mfcc_delta_mean = np.mean(mfcc_delta, axis=1)
    mfcc_delta_std = np.std(mfcc_delta, axis=1)
    mfcc_delta2_mean = np.mean(mfcc_delta2, axis=1)
    mfcc_delta2_std = np.std(mfcc_delta2, axis=1)
    mfcc_liftered = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=30, lifter=22)

    # Chroma
    chroma = librosa.feature.chroma_stft(y=y, sr=sr)
    chroma_mean = chroma.mean(axis=1)
    chroma_std = chroma.std(axis=1)
    
    # Spectral Contrast
    spc = librosa.feature.spectral_contrast(y=y, sr=sr)
    spc_mean = spc.mean(axis=1)
    spc_std = spc.std(axis=1)
    
    # Tonnetz
    tonnetz = librosa.feature.tonnetz(y=y, sr=sr)
    tonnetz_mean = tonnetz.mean(axis=1)
    tonnetz_std = tonnetz.std(axis=1)
    
    # LFCC
    lfcc = compute_lfcc(y, sr)
    lfcc_delta = librosa.feature.delta(lfcc)
    lfcc_delta2 = librosa.feature.delta(lfcc, order=2)
    lfcc_mean = np.mean(lfcc, axis=1)
    lfcc_std = np.std(lfcc, axis=1)
    lfcc_delta_mean = np.mean(lfcc_delta, axis=1)
    lfcc_delta_std = np.std(lfcc_delta, axis=1)
    lfcc_delta2_mean = np.mean(lfcc_delta2, axis=1)
    lfcc_delta2_std = np.std(lfcc_delta2, axis=1)
    
    # CQT
    cqt = np.abs(librosa.cqt(y=y, sr=sr, n_bins=84, bins_per_octave=12))
    cqt_mean = np.mean(cqt, axis=1)
    cqt_std = np.std(cqt, axis=1)
    
    # Mel Spectrogram
    mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=64, hop_length=512)
    log_mel_spec = np.log(mel_spec + 1e-6)
    mel_mean = np.mean(log_mel_spec, axis=1)
    mel_std = np.std(log_mel_spec, axis=1)

    # STFT
    stft_spec = librosa.stft(y, n_fft=2048, hop_length=512)
    stft_mag = np.abs(stft_spec)
    stft_mean = np.mean(stft_mag, axis=1)
    stft_std = np.std(stft_mag, axis=1)
    
    # CQCC
    cqcc = compute_cqcc(y, sr)
    cqcc_delta = librosa.feature.delta(cqcc)
    cqcc_delta2 = librosa.feature.delta(cqcc, order=2)
    cqcc_mean = np.mean(cqcc, axis=1)
    cqcc_std = np.std(cqcc, axis=1)
    cqcc_delta_mean = np.mean(cqcc_delta, axis=1)
    cqcc_delta_std = np.std(cqcc_delta, axis=1)
    cqcc_delta2_mean = np.mean(cqcc_delta2, axis=1)
    cqcc_delta2_std = np.std(cqcc_delta2, axis=1)

    # feature vector 결합
    feature_vector = np.concatenate([
        mfcc_mean, mfcc_std,
        mfcc_delta_mean, mfcc_delta_std,
        mfcc_delta2_mean, mfcc_delta2_std,
        mfcc_liftered.mean(axis=1), mfcc_liftered.std(axis=1),
        cqt_mean, cqt_std,
        mel_mean, mel_std,
        stft_mean, stft_std,
        lfcc_mean, lfcc_std,
        lfcc_delta_mean, lfcc_delta_std,
        lfcc_delta2_mean, lfcc_delta2_std,
        cqcc_mean, cqcc_std,
        cqcc_delta_mean, cqcc_delta_std,
        cqcc_delta2_mean, cqcc_delta2_std,
        chroma_mean, chroma_std,
        spc_mean, spc_std,
        tonnetz_mean, tonnetz_std,
    ])

    return feature_vector