In [None]:
import os
import seaborn as sns
import matplotlib.pyplot as plt

# Taking a look at the classes
TRAIN_DATA_PATH = '../input/birdclef-2022/train_audio/'
birds = os.listdir(TRAIN_DATA_PATH)
print("# Birds: ",len(birds))
import json
scored_birds = json.loads(open('../input/birdclef-2022/scored_birds.json', 'r').read())
print("# Scored birds: ", len(scored_birds))
bird_frequencies = {}
AUDIO_PATHS = {}
for bird_path in birds:
    AUDIO_PATHS[bird_path] = [os.path.join(TRAIN_DATA_PATH, bird_path, i) for i in os.listdir(os.path.join(TRAIN_DATA_PATH, bird_path))]
    bird_frequencies[bird_path] = len(AUDIO_PATHS[bird_path])
print("# of total data points:", sum(bird_frequencies.values()))
print("# of data points for scored species: ", sum([bird_frequencies[i] for i in scored_birds]))
sns.barplot(x=[bird_frequencies[i] for i in scored_birds], y=scored_birds)

In [None]:
sns.histplot(bird_frequencies.values())
plt.title("Song # distribution - all birds")

In [None]:
sns.histplot([bird_frequencies[i] for i in scored_birds])
plt.title("Song # distribution - scored birds")

In [None]:
# Visualizing songs

import librosa, random
from librosa import display
AUDIO_FILE = random.choice(AUDIO_PATHS["skylar"])
samples, sample_rate = librosa.load(AUDIO_FILE, sr=None)
print("Sample rate:", sample_rate)
librosa.display.waveshow(samples, sr=sample_rate)
plt.show()
from IPython.display import Audio
Audio(AUDIO_FILE)

In [None]:
# Fourier transform and mel spectrogram
import numpy as np
n_fft = 2048
sns.lineplot(data=np.abs(librosa.stft(samples[:n_fft], n_fft=n_fft, hop_length=n_fft+1)))
plt.show()
S = librosa.feature.melspectrogram(samples, sr=sample_rate, n_fft=2048, hop_length=512, n_mels=128)
S_DB = librosa.power_to_db(S, ref=np.max)
librosa.display.specshow(S_DB, sr=sample_rate, hop_length=512, x_axis='time', y_axis='mel')
plt.colorbar(format='%+2.0f dB')

In [None]:
# Note: There's often lots of background sound! Noise removal / augmentation to increase model
# noise tolerance may be an avenue to explore.

In [None]:
# Analyzing song lengths
import collections, tqdm
import pandas as pd
import warnings
warnings.filterwarnings("ignore")

# Create and store mel spectrograms
def scale_minmax(X, min=0.0, max=1.0):
    X_std = (X - X.min()) / (X.max() - X.min())
    X_scaled = X_std * (max - min) + min
    return X_scaled
    
bird_song_lengths = collections.defaultdict(list)
imgs, labels = [], []
set_scored_birds = set(scored_birds)
for bird in tqdm.tqdm(birds):
    for AUDIO_FILE in AUDIO_PATHS[bird]:
        samples, sample_rate = librosa.load(AUDIO_FILE, sr=None)
        S = librosa.feature.melspectrogram(samples, sr=sample_rate, n_fft=2048, hop_length=512, n_mels=128)
        S_DB = librosa.power_to_db(S, ref=np.max)
        img = scale_minmax(S_DB, 0, 255).astype(np.uint8)
        imgs.append(img)
        
        #if bird in set_scored_birds: labels.append(bird)
        #else: labels.append("OTHER BIRD")
        labels.append(bird)
            
        bird_song_lengths[bird].append(len(samples) / sample_rate)

In [None]:
df = pd.DataFrame(list(zip(list(range(len(imgs))), labels)), columns =['Image','Label'])
df.to_csv("labels.csv")

In [None]:
!mkdir mel_spectrogram_imgs
%cd mel_spectrogram_imgs
for i in range(len(imgs)):
    with open("imgs%d.npy"%i, "wb") as f:
        np.save(f, imgs[i])

In [None]:
def mean(li):
    return sum(li) / len(li)

sns.histplot([mean(i) for i in bird_song_lengths.values()]); plt.title("Mean song length distribution across species.")
plt.show()
sns.barplot(y = scored_birds, x = [mean(bird_song_lengths[bird]) for bird in scored_birds])
plt.title("Mean song length across scored birds.")

In [None]:
print("Shortest bird song (s):",min([min(i) for i in bird_song_lengths.values()]))
print("Longest bird song (s):",max([max(i) for i in bird_song_lengths.values()]))