# <span style='color:#A80808'>Objective</span>

This notebook provides a basic guide for preprocessing audio data for deep learning.

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import librosa, librosa.display

# <span style='color:#A80808'>Waveform</span>

In [None]:
# Select a random audio file
file = '../input/birdclef-2022/train_audio/calqua/XC109664.ogg'

# Get the waveform signal
signal, sample_rate = librosa.load(file, sr=22050)

# Show the waveform
plt.figure(figsize=(15,5))
librosa.display.waveshow(signal, sr=sample_rate, color='red')
plt.xlabel('Time (s)')
plt.ylabel('Amplitude')
plt.show()

In [None]:
# The duration of the audio is
len(signal)/sample_rate

# <span style='color:#A80808'>Spectrum</span>

In [None]:
# Fast Fourier transform for computing the magnitude in frequency domain
fft = np.fft.fft(signal)
magnitude = np.abs(fft)
frequency = np.linspace(0,sample_rate,len(magnitude))

# Show the spectrum, only the half left of the spectrum is shown as it is symetric.
plt.figure(figsize=(15,3))
plt.plot(frequency[:int(len(frequency)/2)], magnitude[:int(len(frequency)/2)], color='green')
plt.xlabel('Frequency')
plt.ylabel('Magnitude')
plt.show()

# <span style='color:#A80808'>Spectrogram</span>

[wiki](https://en.wikipedia.org/wiki/Spectrogram)

In [None]:
# Short Fourier transform for computing the spectrogram
n_fft = 2048 # signal window size for stft
hop_length = 512 # window shifting = distance between two neighbor window centers
stft = librosa.stft(signal, n_fft=n_fft, hop_length=hop_length)

spectrogram = np.abs(stft)
log_spectrogram = librosa.amplitude_to_db(spectrogram)

# Show the spectrogram
plt.figure(figsize=(15,5))
librosa.display.specshow(log_spectrogram, sr=sample_rate, hop_length=hop_length)
plt.colorbar()
plt.xlabel('Time (s)')
plt.ylabel('Frequency')
plt.show()

# <span style='color:#A80808'>Mel-frequency cepstral coefficients (MFCCs)</span>

[wiki](https://en.wikipedia.org/wiki/Mel-frequency_cepstrum#:~:text=Mel%2Dfrequency%20cepstral%20coefficients%20(MFCCs,collectively%20make%20up%20an%20MFC.&text=This%20frequency%20warping%20can%20allow,windowed%20excerpt%20of)%20a%20signal.)

In [None]:
n_fft = 2048 # signal window size for stft
hop_length = 512 # window shifting = distance between two neighbor window centers
mfcc = librosa.feature.mfcc(y=signal, sr=sample_rate, n_mfcc=5)

# Show mfcc
plt.figure(figsize=(15,5))
librosa.display.specshow(mfcc, sr=sample_rate, hop_length=hop_length)
plt.colorbar()
plt.xlabel('Time (s)')
plt.ylabel('MFCC')
plt.show()