In [7]:
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt

In [23]:

class LogMelSpectrogram(tf.keras.layers.Layer):
    """Compute log-magnitude mel-scaled spectrograms."""

    def __init__(self, sample_rate, n_fft, n_mels,signal_lenght,
                 f_min=0.0, f_max=16000, **kwargs):
        super(LogMelSpectrogram, self).__init__(**kwargs)
        self.signal_lenght = signal_lenght
        self.sample_rate = sample_rate
        self.n_fft = n_fft
        self.n_mels = n_mels
        self.f_min = f_min
        self.f_max = f_max if f_max else sample_rate / 2
        self.hop_length = int(
                self.signal_lenght * self.sample_rate / (self.n_mels-1)
            )
        self.mel_filterbank = tf.signal.linear_to_mel_weight_matrix(
            num_mel_bins=self.n_mels,
            num_spectrogram_bins=n_fft // 2 + 1,
            sample_rate=self.sample_rate,
            lower_edge_hertz=self.f_min,
            upper_edge_hertz=self.f_max)

    def build(self, input_shape):
        self.non_trainable_weights.append(self.mel_filterbank)
        super(LogMelSpectrogram, self).build(input_shape)

    def call(self, waveforms):
        """Forward pass.
        Parameters
        ----------
        waveforms : tf.Tensor, shape = (None, n_samples)
            A Batch of mono waveforms.
        Returns
        -------
        log_mel_spectrograms : (tf.Tensor), shape = (None, time, freq, ch)
            The corresponding batch of log-mel-spectrograms
        """
        def _tf_log10(x):
            numerator = tf.math.log(x)
            denominator = tf.math.log(tf.constant(10, dtype=numerator.dtype))
            return numerator / denominator

        def power_to_db(magnitude, amin=1e-16, top_db=80.0):
            """
            https://librosa.github.io/librosa/generated/librosa.core.power_to_db.html
            """
            ref_value = tf.reduce_max(magnitude)
            log_spec = 10.0 * _tf_log10(tf.maximum(amin, magnitude))
            log_spec -= 10.0 * _tf_log10(tf.maximum(amin, ref_value))
            log_spec = tf.maximum(log_spec, tf.reduce_max(log_spec) - top_db)

            return log_spec

        spectrograms = tf.signal.stft(waveforms,
                                      frame_length=self.n_fft,
                                      frame_step=self.hop_length,
                                      pad_end=False)

        magnitude_spectrograms = tf.abs(spectrograms)

        mel_spectrograms = tf.matmul(tf.square(magnitude_spectrograms),
                                     self.mel_filterbank)

        log_mel_spectrograms = power_to_db(mel_spectrograms)

        # add channel dimension
        log_mel_spectrograms = tf.expand_dims(log_mel_spectrograms, 3)

        return log_mel_spectrograms

    def get_config(self):
        config = {
            'n_fft': self.n_fft,
            'hop_length': self.hop_length,
            'n_mels': self.n_mels,
            'sample_rate': self.sample_rate,
            'f_min': self.f_min,
            'f_max': self.f_max,
        }
        config.update(super(LogMelSpectrogram, self).get_config())

        return config


In [None]:
wave = np.random.randn((WAVE_LEN_SAMPLES)).astype(WAVE_DTYPE)
waves = np.repeat(wave[np.newaxis, ...], 16, axis=0)
i_wave = x = keras.layers.Input(shape=WAVE_LEN_SAMPLES, dtype=WAVE_DTYPE)
x = MelSpectrogram(
    sample_rate=AUDIO_SR,
    fft_size=N_FFT,
    n_mels=N_MELS,
    hop_length=WAVE_LEN_SAMPLES // (N_TIMESTEPS - 1),
    power=POWER,
)(x)
o_float = x = PowerToDB()(x)
o_rgb = x = Float2DToRGB()(x)
m = keras.models.Model(inputs=[i_wave], outputs=[o_float, o_rgb])
msgs_f, msgs_rgb = m.predict(waves)
plt.imshow(msgs_f[0])
plt.figure()
plt.imshow(msgs_rgb[0])

In [None]:
def ConvModel(n_classes, sample_rate=16000, signal_lenght=5,
              fft_size=2024, n_mels=224):
    n_samples = sample_rate * signal_lenght
    x = Input(shape=(n_samples,), name='input', dtype='float32')
    y = LogMelSpectrogram(sample_rate, fft_size, hop_size, n_mels)(x)

In [30]:
wave = np.random.randn((160000)).astype('float16')
waves = np.repeat(wave[np.newaxis, ...], 16, axis=0)
x = tf.keras.layers.Input(shape=(16,160000), dtype='float16')
y = LogMelSpectrogram(sample_rate=32000, n_fft=2024, n_mels=224, signal_lenght=5)(x)
m = keras.models.Model(inputs=x, outputs=y)
msgs_rgb = m.predict(waves)




To change all layers to have dtype float16 by default, call `tf.keras.backend.set_floatx('float16')`. To change just this layer, pass dtype='float16' to the layer constructor. If you are the author of this layer, you can disable autocasting by passing autocast=False to the base Layer constructor.



ValueError: in user code:

    <ipython-input-20-413bd84213ec>:61 call  *
        mel_spectrograms = tf.matmul(tf.square(magnitude_spectrograms),
    /usr/local/lib/python3.8/dist-packages/tensorflow/python/util/dispatch.py:201 wrapper  **
        return target(*args, **kwargs)
    /usr/local/lib/python3.8/dist-packages/tensorflow/python/ops/math_ops.py:3215 matmul
        return gen_math_ops.batch_mat_mul_v2(
    /usr/local/lib/python3.8/dist-packages/tensorflow/python/ops/gen_math_ops.py:1561 batch_mat_mul_v2
        _, _, _op, _outputs = _op_def_library._apply_op_helper(
    /usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/op_def_library.py:742 _apply_op_helper
        op = g._create_op_internal(op_type_name, inputs, dtypes=None,
    /usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/func_graph.py:591 _create_op_internal
        return super(FuncGraph, self)._create_op_internal(  # pylint: disable=protected-access
    /usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/ops.py:3477 _create_op_internal
        ret = Operation(
    /usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/ops.py:1974 __init__
        self._c_op = _create_c_op(self._graph, node_def, inputs,
    /usr/local/lib/python3.8/dist-packages/tensorflow/python/framework/ops.py:1815 _create_c_op
        raise ValueError(str(e))

    ValueError: Dimensions must be equal, but are 1025 and 1013 for '{{node log_mel_spectrogram_7/MatMul}} = BatchMatMulV2[T=DT_FLOAT, adj_x=false, adj_y=false](log_mel_spectrogram_7/Square, log_mel_spectrogram_7/MatMul/b)' with input shapes: [?,16,221,1025], [1013,224].
