In [None]:
import librosa
import soundfile as sf
import scipy.signal as signal
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from librosa import display as ld
from matplotlib.patches import Rectangle

In [None]:
data_dir = '/kaggle/input/rfcx-species-audio-detection/'

df_t = pd.read_csv(data_dir+'train_tp.csv')
df_t["t_dif"] = df_t["t_max"] - df_t["t_min"]
df_t["f_dif"] = df_t["f_max"] - df_t["f_min"]

df_f = pd.read_csv(data_dir+'train_fp.csv')
df_f["t_dif"] = df_f["t_max"] - df_f["t_min"]
df_f["f_dif"] = df_f["f_max"] - df_f["f_min"]

df_t.head()

### Get example

In [None]:
example = df_t.iloc[0]
data, samplerate = sf.read(data_dir+'train/'+example.recording_id+'.flac')

### Audio sound

In [None]:
import IPython.display as ipd
ipd.Audio(data,rate=samplerate)

In [None]:
times = np.linspace(0,len(data),len(data))/samplerate
plt.figure(figsize=(30, 10))
plt.plot(times,data)
plt.show()

# SPECTOGRAM

In [None]:
plt.figure(figsize=(30, 10))
Pxx, freqs, bins, im = plt.specgram(data, Fs=samplerate)

plt.ylabel('Frequency [Hz]')
plt.xlabel('Time [sec]')
# Add the audio position
ax = plt.gca()
audio_position = Rectangle((example['t_min'],example['f_min']),example['t_dif'],example['f_dif'],linewidth=3,edgecolor='g',facecolor='none')
ax.add_patch(audio_position)

plt.colorbar()
plt.show()

# SFTF

In [None]:
sftf = librosa.stft(data)

plt.figure(figsize=(30, 10))
ld.specshow(sftf, sr=samplerate, x_axis='time', y_axis='hz')

# Add the audio position
ax = plt.gca()
audio_position = Rectangle((example['t_min'],example['f_min']),example['t_dif'],example['f_dif'],linewidth=3,edgecolor='g',facecolor='none')
ax.add_patch(audio_position)

plt.colorbar()
plt.show()

# SFTF (amplitude_to_db)

In [None]:
sftf = librosa.stft(data)
sftf_xdb = librosa.amplitude_to_db(abs(sftf))

plt.figure(figsize=(30, 10))
ld.specshow(sftf_xdb, sr=samplerate, x_axis='time', y_axis='hz')

# Add the audio position
ax = plt.gca()
audio_position = Rectangle((example['t_min'],example['f_min']),example['t_dif'],example['f_dif'],linewidth=3,edgecolor='g',facecolor='none')
ax.add_patch(audio_position)

plt.colorbar()
plt.show()

# MEL SPECTOGRAM

In [None]:
melspec = librosa.feature.melspectrogram(data, sr=samplerate)

plt.figure(figsize=(30, 10))
ld.specshow(melspec, sr=samplerate, x_axis='time', y_axis='hz')

# Add the audio position
ax = plt.gca()
audio_position = Rectangle((example['t_min'],example['f_min']),example['t_dif'],example['f_dif'],linewidth=3,edgecolor='g',facecolor='none')
ax.add_patch(audio_position)
plt.colorbar()

plt.show()


# MEL SPECTOGRAM (amplitude_to_db)

In [None]:
melspec = librosa.feature.melspectrogram(data, sr=samplerate)
melspec_xdb = librosa.amplitude_to_db(abs(melspec))

plt.figure(figsize=(30, 10))
ld.specshow(melspec_xdb, sr=samplerate, x_axis='time', y_axis='hz')

# Add the audio position
ax = plt.gca()
audio_position = Rectangle((example['t_min'],example['f_min']),example['t_dif'],example['f_dif'],linewidth=3,edgecolor='g',facecolor='none')
ax.add_patch(audio_position)
plt.colorbar()

plt.show()

# MFCC

In [None]:
mfcc = librosa.feature.mfcc(data, sr=samplerate)

plt.figure(figsize=(30, 10))
ld.specshow(mfcc, sr=samplerate, x_axis='time', y_axis='hz')

# Add the audio position
ax = plt.gca()
audio_position = Rectangle((example['t_min'],example['f_min']),example['t_dif'],example['f_dif'],linewidth=3,edgecolor='g',facecolor='none')
ax.add_patch(audio_position)
plt.colorbar()

plt.show()

# MFCC (amplitude_to_db)

In [None]:
mfcc = librosa.feature.mfcc(data, sr=samplerate)
mfcc_xdb = librosa.amplitude_to_db(abs(mfcc))

plt.figure(figsize=(30, 10))
ld.specshow(mfcc_xdb, sr=samplerate, x_axis='time', y_axis='hz')

# Add the audio position
ax = plt.gca()
audio_position = Rectangle((example['t_min'],example['f_min']),example['t_dif'],example['f_dif'],linewidth=3,edgecolor='g',facecolor='none')
ax.add_patch(audio_position)
plt.colorbar()

plt.show()

# CHROMA_STFT

In [None]:
chroma = librosa.feature.chroma_stft(data, sr=samplerate)

plt.figure(figsize=(30, 10))
ld.specshow(chroma, sr=samplerate, x_axis='time', y_axis='hz')

# Add the audio position
ax = plt.gca()
audio_position = Rectangle((example['t_min'],example['f_min']),example['t_dif'],example['f_dif'],linewidth=3,edgecolor='g',facecolor='none')
ax.add_patch(audio_position)
plt.colorbar()

plt.show()

# CHROMA_STFT (amplitude_to_db)

In [None]:
chroma = librosa.feature.chroma_stft(data, sr=samplerate)
chroma_xdb = librosa.amplitude_to_db(abs(chroma))

plt.figure(figsize=(30, 10))
ld.specshow(chroma_xdb, sr=samplerate, x_axis='time', y_axis='hz')

# Add the audio position
ax = plt.gca()
audio_position = Rectangle((example['t_min'],example['f_min']),example['t_dif'],example['f_dif'],linewidth=3,edgecolor='g',facecolor='none')
ax.add_patch(audio_position)
plt.colorbar()

plt.show()