In [None]:
%matplotlib inline
from tifresi import pyplot as plt
import numpy as np

# Select a test signal


In [None]:
import librosa
filename = librosa.util.example('brahms')

# Load the signal

In [None]:
from tifresi.utils import load_signal
y, sr = load_signal(filename)
# Apply some light preprocessing
from tifresi.utils import preprocess_signal
y = preprocess_signal(y)
# Plot the signal in the time domain
n = 256*256
t = np.arange(n)/sr*1000
plt.figure(figsize=(10, 2))
plt.plot(t, y[:n])
plt.xlabel('Time [ms]');
plt.ylabel('Amplitude');
plt.xlim(0, np.max(t))

In [None]:
from IPython.display import display, Audio
display(Audio(y, rate=sr))

# Define the main parameters
* The number of frequency channel for the stft `stft_channels`
* The hop_size in time `hop_size`
* The number of mel bins in time `n_mels`

In [None]:
# Here we use the default paramters. We recommend using them.
from tifresi.hparams import HParams
stft_channels = HParams.stft_channels # 1024
hop_size = HParams.hop_size # 256
n_mels = HParams.n_mels # 80
fmin = HParams.fmin # 0
fmax = HParams.fmax # None
mel_basis = HParams.mel_basis

# Build the time frequency system
The system will be used for the following operation:
* Compute the STFT
* Compute the spectrogram
* Compute the mel spectrogram

In [None]:
from tifresi.stft import GaussTF
stft_system = GaussTF(hop_size=hop_size, stft_channels=stft_channels)

# Compute the melspectrogram

In [None]:
Y = stft_system.spectrogram(y)

In [None]:
dynamic_range_dB = 50

# We use the log spectrogram as we have a logarithmic perception of sound energy.
from tifresi.transforms import log_spectrogram
logSpectrogram= log_spectrogram(Y, dynamic_range_dB=dynamic_range_dB)
tmax = logSpectrogram.shape[1]/sr * hop_size
plt.figure(figsize=(10, 2))
plt.imshow(logSpectrogram, cmap="afmhot_r", origin="lower", aspect="auto", extent=[0, tmax, 0, sr/2/1000])
plt.title('Log spectrogram')
plt.xlabel('Time [s]')
plt.xlim(0, tmax)
plt.ylabel('Frequency [kHz]')
plt.colorbar()


In [None]:
mel_basis = librosa.filters.mel(sr=sr, n_fft=stft_channels, n_mels=n_mels, fmin=fmin, fmax=fmax)


In [None]:
# We use the log mel spectrogram as we have a logarithmic perception of sound energy.
from tifresi.transforms import log_mel_spectrogram, log_spectrogram
log_Y = log_mel_spectrogram(Y, stft_channels, n_mels)
# this is equivalent as doing
log_Y2 = log_spectrogram(mel_basis.dot(Y))

np.testing.assert_allclose(log_Y, log_Y2, atol=1e-6)

In [None]:
# from tifresi.hparams import HParams as p

# mel_basis = librosa.filters.mel(sr=sr, n_fft=stft_channels, n_mels=n_mels, fmin=p.fmin, fmax=p.fmax)
# plt.imshow(mel_basis, aspect='auto', origin='lower')

In [None]:
plt.figure(dpi=200, figsize=(10,2))
plt.imshow(log_Y[:,:n//hop_size], cmap="afmhot_r", origin="lower", aspect="auto", extent=[0, tmax, 0, sr/2/1000])
plt.title('Log mel spectrogram')
plt.xlabel('Time [s]')
plt.xlim(0, tmax)
# plt.ylabel('Frequency [kHz]')
# no y axis
plt.yticks([])
plt.colorbar()