# [Mel-Frequency Cepstral Coefficients](https://ratsgo.github.io/speechbook/docs/fe/mfcc#filter-banks)

# torch.mel
+ target
```python
torchaudio.transforms.MelSpectrogram(sample_rate=self.sample_rate, \
        n_fft=self.n_fft,\
        win_length=self.win_length,\
        hop_length=self.hop_length, \
        n_mels=self.n_mels, \
        f_min = 20, f_max = 7600, \
        window_fn=torch.hamming_window, ) : 
```

+ n_fft = 512
+ win_length = 400
+ hop_length = 160
+ n_mels = 80


---

```python
class MelSpectrogram(torch.nn.Module):
    __constants__ = ["sample_rate", "n_fft", "win_length", "hop_length", "pad", "n_mels", "f_min"]

    def __init__(
        self,
        sample_rate: int = 16000,
        n_fft: int = 400,
        win_length: Optional[int] = None,
        hop_length: Optional[int] = None,
        f_min: float = 0.0,
        f_max: Optional[float] = None,
        pad: int = 0,
        n_mels: int = 128,
        window_fn: Callable[..., Tensor] = torch.hann_window,
        power: float = 2.0,
        normalized: bool = False,
        wkwargs: Optional[dict] = None,
        center: bool = True,
        pad_mode: str = "reflect",
        onesided: Optional[bool] = None,
        norm: Optional[str] = None,
        mel_scale: str = "htk",
    ) -> None:
        super(MelSpectrogram, self).__init__()
        torch._C._log_api_usage_once("torchaudio.transforms.MelSpectrogram")

        if onesided is not None:
            warnings.warn(
                "Argument 'onesided' has been deprecated and has no influence on the behavior of this module."
            )

        self.sample_rate = sample_rate
        self.n_fft = n_fft
        self.win_length = win_length if win_length is not None else n_fft
        self.hop_length = hop_length if hop_length is not None else self.win_length // 2
        self.pad = pad
        self.power = power
        self.normalized = normalized
        self.n_mels = n_mels  # number of mel frequency bins
        self.f_max = f_max
        self.f_min = f_min
        self.spectrogram = Spectrogram(
            n_fft=self.n_fft,
            win_length=self.win_length,
            hop_length=self.hop_length,
            pad=self.pad,
            window_fn=window_fn,
            power=self.power,
            normalized=self.normalized,
            wkwargs=wkwargs,
            center=center,
            pad_mode=pad_mode,
            onesided=True,
        )        
        self.mel_scale = MelScale(
            self.n_mels, self.sample_rate, self.f_min, self.f_max, self.n_fft // 2 + 1, norm, mel_scale
        )

    def forward(self, waveform: Tensor) -> Tensor:
        specgram = self.spectrogram(waveform)
        mel_specgram = self.mel_scale(specgram)
        return mel_specgram

```




# Prev Impl

In [6]:
nfilt = 40
low_freq_mel = 0
sample_rate = 16000
NFFT = 512

high_freq_mel = (2595 * np.log10(1 + (sample_rate / 2) / 700))  # Convert Hz to Mel
mel_points = np.linspace(low_freq_mel, high_freq_mel, nfilt + 2)  # Equally spaced in Mel scale
hz_points = (700 * (10**(mel_points / 2595) - 1))  # Convert Mel to Hz
bin = np.floor((NFFT + 1) * hz_points / sample_rate)

fbank = np.zeros((nfilt, int(np.floor(NFFT / 2 + 1))))
for m in range(1, nfilt + 1):
    f_m_minus = int(bin[m - 1])   # left
    f_m = int(bin[m])             # center
    f_m_plus = int(bin[m + 1])    # right
    for k in range(f_m_minus, f_m):
        fbank[m - 1, k] = (k - bin[m - 1]) / (bin[m] - bin[m - 1])
    for k in range(f_m, f_m_plus):
        fbank[m - 1, k] = (bin[m + 1] - k) / (bin[m + 1] - bin[m])

In [9]:
display(mel_points)

array([   0.        ,   69.2688548 ,  138.5377096 ,  207.80656439,
        277.07541919,  346.34427399,  415.61312879,  484.88198358,
        554.15083838,  623.41969318,  692.68854798,  761.95740278,
        831.22625757,  900.49511237,  969.76396717, 1039.03282197,
       1108.30167676, 1177.57053156, 1246.83938636, 1316.10824116,
       1385.37709596, 1454.64595075, 1523.91480555, 1593.18366035,
       1662.45251515, 1731.72136994, 1800.99022474, 1870.25907954,
       1939.52793434, 2008.79678914, 2078.06564393, 2147.33449873,
       2216.60335353, 2285.87220833, 2355.14106312, 2424.40991792,
       2493.67877272, 2562.94762752, 2632.21648232, 2701.48533711,
       2770.75419191, 2840.02304671])

In [10]:
display(hz_points)

array([   0.        ,   44.37407701,   91.56109503,  141.73937073,
        195.09852453,  251.84019719,  312.17881177,  376.34238398,
        444.57338374,  517.12965156,  594.28537283,  676.33211398,
        763.57992429,  856.35850754,  955.01846792, 1059.93263499,
       1171.49747253, 1290.13457677, 1416.29226959, 1550.44729265,
       1693.10660904, 1844.80931898, 2006.12869712, 2177.67435884,
       2360.094564  , 2554.07866662, 2760.35971998, 2979.71724669,
       3212.98018452, 3461.03001887, 3724.80411386, 4005.29925458,
       4303.57541383, 4620.75975774, 4958.05090523, 5316.72345751,
       5698.13281472, 6103.72029792, 6535.01859571, 6993.65755619,
       7481.37034603, 8000.        ])

In [7]:
display(bin)

array([  0.,   1.,   2.,   4.,   6.,   8.,  10.,  12.,  14.,  16.,  19.,
        21.,  24.,  27.,  30.,  33.,  37.,  41.,  45.,  49.,  54.,  59.,
        64.,  69.,  75.,  81.,  88.,  95., 103., 110., 119., 128., 137.,
       148., 158., 170., 182., 195., 209., 224., 239., 256.])

In [8]:
display(fbank)

array([[0.        , 1.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 1.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.11764706, 0.05882353,
        0.        ]])