# Original Routine

```python

cutoff_bin = np.int(np.floor(params['fmax_spectra_salsalite'] * self._nfft / np.float(self._fs)))
assert self._upper_bin <= self._cutoff_bin, 'Upper bin for doa featurei {} is higher than cutoff bin for spectrogram {}!'.format()
self._nb_mel_bins = self._cutoff_bin-self._lower_bin 

self._mel_wts = librosa.filters.mel(sr=self._fs, n_fft=self._nfft, n_mels=self._nb_mel_bins).T


def spectrogram(audio_input, _nb_frames):
    _nb_ch = audio_input.shape[1]
    nb_bins = self._nfft // 2
    spectra = []
    for ch_cnt in range(_nb_ch):
        stft_ch = librosa.core.stft(np.asfortranarray(audio_input[:, ch_cnt]), n_fft=self._nfft, hop_length=self._hop_len,
                                    win_length=self._win_len, window='hann')
        spectra.append(stft_ch[:, :_nb_frames])
    return np.array(spectra).T

def _get_mel_spectrogram(self, linear_spectra):
    mel_feat = np.zeros((linear_spectra.shape[0], self._nb_mel_bins, linear_spectra.shape[-1]))
    for ch_cnt in range(linear_spectra.shape[-1]):
        mag_spectra = np.abs(linear_spectra[:, :, ch_cnt])**2
        mel_spectra = np.dot(mag_spectra, self._mel_wts)
        log_mel_spectra = librosa.power_to_db(mel_spectra)
        mel_feat[:, :, ch_cnt] = log_mel_spectra
    mel_feat = mel_feat.transpose((0, 2, 1)).reshape((linear_spectra.shape[0], -1))
    return mel_feat

def _get_foa_intensity_vectors(self, linear_spectra,eps=1e-8):

    W = linear_spectra[:, :, 0]
    I = np.real(np.conj(W)[:, :, np.newaxis] * linear_spectra[:, :, 1:])
    E = eps + (np.abs(W)**2 + ((np.abs(linear_spectra[:, :, 1:])**2).sum(-1))/3.0 )
    
    I_norm = I/E[:, :, np.newaxis]
    I_norm_mel = np.transpose(np.dot(np.transpose(I_norm, (0,2,1)), self._mel_wts), (0,2,1))
    foa_iv = I_norm_mel.transpose((0, 2, 1)).reshape((linear_spectra.shape[0], self._nb_mel_bins * 3))
    if np.isnan(foa_iv).any():
        print('Feature extraction is generating nan outputs')
        exit()
    return foa_iv

```

In [1]:
import librosa
import numpy as np

In [10]:
mels_wts = librosa.filters.mel(sr=16000, n_fft=512, n_mels=40).T

def get_spectrogram(audio_input):
    _nb_ch = audio_input.shape[1]
    nb_bins = 512 // 2
    spectra = []
    for ch_cnt in range(_nb_ch):
        stft_ch = librosa.core.stft(np.asfortranarray(audio_input[:, ch_cnt]), n_fft=512)
        spectra.append(stft_ch)
    return np.array(spectra).T

def get_mel_spectrogram(linear_spectra):
    mel_feat = np.zeros((linear_spectra.shape[0], 40, linear_spectra.shape[-1]))
    for ch_cnt in range(linear_spectra.shape[-1]):
        mag_spectra = np.abs(linear_spectra[:, :, ch_cnt])**2
        mel_spectra = np.dot(mag_spectra, mels_wts)
        log_mel_spectra = librosa.power_to_db(mel_spectra)
        mel_feat[:, :, ch_cnt] = log_mel_spectra
    mel_feat = mel_feat.transpose((0, 2, 1)).reshape((linear_spectra.shape[0], -1))
    return mel_feat

def get_foa_intensity_vectors(linear_spectra,eps=1e-8):

    W = linear_spectra[:, :, 0]
    I = np.real(np.conj(W)[:, :, np.newaxis] * linear_spectra[:, :, 1:])
    E = eps + (np.abs(W)**2 + ((np.abs(linear_spectra[:, :, 1:])**2).sum(-1))/3.0 )
    
    I_norm = I/E[:, :, np.newaxis]
    I_norm_mel = np.transpose(np.dot(np.transpose(I_norm, (0,2,1)), mels_wts), (0,2,1))
    foa_iv = I_norm_mel.transpose((0, 2, 1)).reshape((linear_spectra.shape[0], 40 * 3))
    return foa_iv

In [11]:
raw = np.random.rand(16000,4)
print(raw.shape)

(16000, 4)


In [12]:
spec = get_spectrogram(raw)
print(spec.shape)

mels = get_mel_spectrogram(spec)
print(mels.shape)

iv = get_foa_intensity_vectors(spec)
print(iv.shape)

(126, 257, 4)
(126, 160)
(126, 120)


RNN 쓸려면 T가 앞에 와야하니까 이런식으로 구성한듯? 근데 채널별 데이터를 다 하나로 해두었네