In [10]:
"""
This module provides an abstract interface for all transforms (Transform)
and an enumerations with all available transforms (TransformType).
TransformType should be updated as new transforms are added.
"""

import logging
from abc import ABC, abstractmethod
from enum import Enum







class TransformType(Enum):
    """Enumeration class with all available transforms."""

    LOG = "log"
    MAGNITUDESPECTROGRAM = "magnitudespectrogram"
    MELSPECTROGRAM = "melspectrogram"
    MINMAXSCALER = "minmaxscaler"
    MFCC = "mfcc"
    POWERSPECTROGRAM = "powerspectrogram"
    STFT = "stft"
    ROWSTANDARDISER = "rowstandardiser"
    STANDARDISER = "standardiser"


class Transform(ABC):
    """Transform is a common interface for all transforms objects. Such
    objects manipulate a signal (e.g., applying log scaling, extracting
    MFCCs).
    Attrs:
        - name: The name of the transforms
    """

    def __init__(self, name: TransformType):
        self.name = name
        
    @abstractmethod
    def process(self, signal) :
        """This method is responsible to apply a transforms to the incoming
        signal.
        :param signal: Signal object to be manipulated
        :return: New signal object with transformed values
        """

    def _prepend_transform_name(self, string):
        return self.name.value + "_" + string




import librosa






class MelSpectrogram(Transform):
    """This class extracts a Mel spectrogram from a signal.
    It's a concrete Transform. librosa facilities are used to extract Mel
    spectrograms.
    Attributes:
        - num_mels: Number of mel bands
        - min_freq: Lowest frequency in Hertz. Frequencies below this
            threshold are filtered out
        - max_freq: Highest frequency in Hertz. Frequencies above this
            threshold are filtered out
        - max_freq: Number of mel bands
        - frame_length: Length of the windowed signal after padding with zeros
        - hop_length: Number of audio samples between adjacent STFT columns
        - win_length: Each frame of audio is windowed by window of length
            win_length and then padded with zeros to match frame_length
        - window: Windowing method employed for STFT. Default is 'hann'
    """

    def __init__(self,
                 num_mels: int = 64,
                 min_freq: int = 0,
                 max_freq: int = 8000,
                 frame_length: int = 2048,
                 hop_length: int = 1024,
                 win_length: int = 2048,
                 window: str = "hann"):

        # input of Transform is  a name, hence we have to pass a name 
        super().__init__(TransformType.MELSPECTROGRAM) 
        self.num_mels = num_mels
        self.min_freq = min_freq
        self.max_freq = max_freq
        self.frame_length = frame_length
        self.hop_length = hop_length
        self.win_length = win_length
        self.window = window

    def process(self, signal):
        """Extract Mel Spectrogram and modify signal.
        :param signal: Signal object.
        :return: Modified signal
        """
        signal.name = self._prepend_transform_name(signal.name)
        signal.data = librosa.feature.melspectrogram(
                            signal.data,
                            sr=signal.sample_rate,
                            n_mels=self.num_mels,
                            n_fft=self.frame_length,
                            hop_length=self.hop_length,
                            win_length=self.win_length,
                            window=self.window)
        
        return signal

d = {'mel_spec':MelSpectrogram}
transform = d.get('mel_spec')
transform()

<__main__.MelSpectrogram at 0x7f04c99e0198>

In [17]:
TransformType.MELSPECTROGRAM.value


'melspectrogram'

In [9]:
tr = MelSpectrogram()
tr.name

<TransformType.MELSPECTROGRAM: 'melspectrogram'>