## Convert given frequency range to mel filter banks
Reference: http://practicalcryptography.com/miscellaneous/machine-learning/guide-mel-frequency-cepstral-coefficients-mfccs

In [9]:
import numpy as np

def freq_to_mel(freq_val):
    mel_val = 1125 * np.log(1 + freq_val/700)
    return mel_val
    
def mel_to_freq(mel_val):
    freq_val = 700 * (np.exp(mel_val/1125) - 1)
    return freq_val

In [40]:
fmin = 20
fmax = 4000
n_mels = 40

# Convert min and max frequency to mel scale
fmin_in_mel = freq_to_mel(fmin)
fmax_in_mel = freq_to_mel(fmax)
# print('fmin in mel ', fmin_in_mel)
# print('fmax in mel', fmax_in_mel)

# Get n_mels linearly-spaced mel frequencies between min and max
# add 2 to no of points because we use triangular filters (which span 2 banks).
filterbanks_pts_mel = np.linspace(fmin_in_mel, fmax_in_mel, n_mels + 2)
filterbank_pts_freq = mel_to_freq(filterbanks_pts_mel)
# print('filterbank points in mel: ', filterbanks_pts_mel)
# print('filterbank points in freq: ', filterbank_pts_freq)

# print range of each filter bank.
for i in range(len(filterbank_pts_freq) - 2):
    start_freq = filterbank_pts_freq[i]
    end_freq = filterbank_pts_freq[i+2]
    print('Mel {}: {} - {}Hz'.format(i+1, np.round(start_freq), np.round(end_freq)))

Mel 1: 20.0 - 89.0Hz
Mel 2: 54.0 - 126.0Hz
Mel 3: 89.0 - 165.0Hz
Mel 4: 126.0 - 205.0Hz
Mel 5: 165.0 - 247.0Hz
Mel 6: 205.0 - 292.0Hz
Mel 7: 247.0 - 338.0Hz
Mel 8: 292.0 - 387.0Hz
Mel 9: 338.0 - 438.0Hz
Mel 10: 387.0 - 491.0Hz
Mel 11: 438.0 - 547.0Hz
Mel 12: 491.0 - 605.0Hz
Mel 13: 547.0 - 666.0Hz
Mel 14: 605.0 - 730.0Hz
Mel 15: 666.0 - 797.0Hz
Mel 16: 730.0 - 867.0Hz
Mel 17: 797.0 - 941.0Hz
Mel 18: 867.0 - 1018.0Hz
Mel 19: 941.0 - 1098.0Hz
Mel 20: 1018.0 - 1182.0Hz
Mel 21: 1098.0 - 1270.0Hz
Mel 22: 1182.0 - 1363.0Hz
Mel 23: 1270.0 - 1459.0Hz
Mel 24: 1363.0 - 1560.0Hz
Mel 25: 1459.0 - 1666.0Hz
Mel 26: 1560.0 - 1777.0Hz
Mel 27: 1666.0 - 1893.0Hz
Mel 28: 1777.0 - 2014.0Hz
Mel 29: 1893.0 - 2141.0Hz
Mel 30: 2014.0 - 2274.0Hz
Mel 31: 2141.0 - 2413.0Hz
Mel 32: 2274.0 - 2559.0Hz
Mel 33: 2413.0 - 2712.0Hz
Mel 34: 2559.0 - 2872.0Hz
Mel 35: 2712.0 - 3039.0Hz
Mel 36: 2872.0 - 3214.0Hz
Mel 37: 3039.0 - 3397.0Hz
Mel 38: 3214.0 - 3589.0Hz
Mel 39: 3397.0 - 3790.0Hz
Mel 40: 3589.0 - 4000.0Hz


## Calculate number of frames

In [34]:
# To calculate number of frames given window size and hop length, we can use same fxn as in convolutions
def get_no_frames(signal_len_ms, frame_size_ms, sampl_freq, hop_len_ms, padding_ms):
    signal_len_in_samples = signal_len_ms * sampl_freq / 1000.0
    frame_size_in_samples = frame_size_ms * sampl_freq / 1000.0
    hop_len_in_samples = hop_len_ms * sampl_freq / 1000.0
    padding_in_samples = padding_ms * sampl_freq / 1000.0
    
    n_frames = (signal_len_in_samples - frame_size_in_samples + 2 * padding_in_samples) / hop_len_in_samples + 1
    return n_frames

In [39]:
s_len = 1000
f_len = 30
hop_len = 10
sr = 8000
p = 15  # note librosa adding the padding by default.

print(get_no_frames(s_len, f_len, sr, hop_len, p))


101.0
