In [None]:
import importlib

import librosa
from librosa.display import waveshow
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow_io as tfio
import tensorflow as tf

import src.preprocess
importlib.reload(src.preprocess)

SR = 32000
DATA_ROOT = "gs://bird-clef-kimmo/data"
SAMPLE_LENGTH = 5 # seconds

In [None]:
audio = tfio.audio.AudioIOTensor(f'{DATA_ROOT}/train_short_audio/mallar3/XC104614.ogg')
print(audio)

In [None]:
audio_tensor = tf.squeeze(audio.to_tensor(), axis=1)
audio_tensor = audio_tensor[:SR*SAMPLE_LENGTH]
print(audio_tensor)

### Plot waveform

In [None]:
fig, ax = plt.subplots(nrows=2, sharex=True)

# With pyplot
audio_np = audio_tensor.numpy()
ts = np.linspace(0, SAMPLE_LENGTH, len(audio_np))
ax[0].plot(ts, audio_np)

# With librosa.display.waveshow
waveshow(audio_np, sr=SR, x_axis='time', label="Waveform", ax=ax[1])

In [None]:
from IPython.display import Audio

Audio(audio_np, rate=audio.rate.numpy())

## Spectrogram through `tfio.audio`

In [None]:
NFFT = 1024
WINDOW = 1024
STRIDE = 512 # Hop length
spectrogram = tfio.audio.spectrogram(
    audio_tensor, nfft=NFFT, window=WINDOW, stride=STRIDE)

NT = int(len(audio_np) / STRIDE)
NF = int(NFFT / 2 + 1)
print("Shape", spectrogram.shape, "Expected shape", (NT, NF))

ts = np.linspace(0, SAMPLE_LENGTH, NT)
fs = np.arange(NF)

fig, ax = plt.subplots(nrows=2, ncols=1, sharex=True, figsize=(12, 6))

top_db = 80
spectrogram_db = tfio.audio.dbscale(
    spectrogram, top_db, name=None
)
spectrogram_db_np = spectrogram_db.numpy().T
img = ax[0].imshow(spectrogram_db_np, extent=[0, SAMPLE_LENGTH, NF, 0])
ax[0].set_aspect("auto")
ax[0].label_outer()
ax[0].set(title='Linear frequency scale')

# fig.colorbar(img, ax=ax[0])

mels = 128
mel_spectrogram = tfio.audio.melscale(
    spectrogram, rate=SR, mels=mels, fmin=0, fmax=SR/4)

dbscale_mel_spectrogram = tfio.audio.dbscale(
    mel_spectrogram, top_db=top_db)

mel_spectrogram_db_np = dbscale_mel_spectrogram.numpy().T
img = ax[1].imshow(mel_spectrogram_db_np, extent=[0, SAMPLE_LENGTH, mels, 0])
ax[1].set_aspect("auto")
ax[1].set_xlabel('Time (s)')
ax[1].set(title='Mel frequency scale')
fig.colorbar(img, ax=ax, format="%+2.f dB")

# ax.title('Spectrogram on DB scale')
# plt.xlabel('Time (s)')
# plt.ylabel('Frequency')

### Spectrogram through `librosa`

In [None]:
fig, ax = plt.subplots(nrows=3, ncols=1, sharex=True, figsize=(12, 6))

D = librosa.amplitude_to_db(np.abs(librosa.stft(audio_np)), ref=np.max)
img = librosa.display.specshow(D, y_axis='linear', x_axis='time',
                               sr=SR, ax=ax[0])
ax[0].set(title='Linear-frequency power spectrogram')
ax[0].label_outer()

hop_length = 1024
D = librosa.amplitude_to_db(np.abs(librosa.stft(audio_np, hop_length=hop_length)),
                            ref=np.max)
librosa.display.specshow(D, y_axis='log', sr=SR, hop_length=hop_length,
                         x_axis='time', ax=ax[1])
ax[1].set(title='Log-frequency power spectrogram')
ax[1].label_outer()
fig.colorbar(img, ax=ax, format="%+2.f dB")

n_mels = 128
fmax=SR / 2

NFFT = 1024
STRIDE = 512
M = librosa.feature.melspectrogram(y=audio_np,
                                   sr=SR,
                                   n_fft=NFFT,
                                   hop_length=STRIDE,
                                   n_mels=n_mels,
                                   fmax=fmax)
M_db = librosa.power_to_db(M, ref=np.max)

librosa.display.specshow(M_db, y_axis='mel', sr=SR, hop_length=STRIDE,
                         x_axis='time', fmax=fmax, ax=ax[2])

ax[2].set(title='Mel-frequency power spectrogram')

print("Shape of mel-scale spectrogram:", np.shape(M_db))

### Frequency masking

In [None]:
freq_mask = tfio.audio.freq_mask(dbscale_mel_spectrogram, param=20)

fig, ax = plt.subplots(figsize=(6,6))
ax.imshow(freq_mask.numpy().T, extent=[0, 5, NF, 0])
ax.set_aspect("auto")

### Time masking

In [None]:
time_mask = tfio.audio.time_mask(dbscale_mel_spectrogram, param=50)

fig, ax = plt.subplots(figsize=(6,6))
ax.imshow(time_mask.numpy().T, extent=[0, 5, NF, 0])
ax.set_aspect("auto")