In [None]:
import os

import warnings
warnings.filterwarnings(action='ignore')

import pandas as pd
import librosa
import numpy as np

from sklearn.utils import shuffle
from PIL import Image
from tqdm import tqdm
import matplotlib.pyplot as plt

import tensorflow as tf

# Global vars
RANDOM_SEED = 1337
SAMPLE_RATE = 32000
SIGNAL_LENGTH = 5 # seconds
SPEC_SHAPE = (224, 224) # height x width
FMIN = 500
FMAX = 12500

In [None]:
TRAIN = pd.read_csv('../input/birdclef-2021/train_metadata.csv')
LABELS = sorted(TRAIN.primary_label.unique())

In [None]:
# Adapted from https://www.kaggle.com/stefankahl/birdclef2021-model-training/data
def get_spectrograms(filepath, primary_label, output_dir):
    
    # Open the file with librosa (limited to the first 15 seconds)
    sig, rate = librosa.load(filepath, sr=SAMPLE_RATE, offset=None, duration=15)
    
    # Split signal into five second chunks
    sig_splits = []
    for i in range(0, len(sig), int(SIGNAL_LENGTH * SAMPLE_RATE)):
        split = sig[i:i + int(SIGNAL_LENGTH * SAMPLE_RATE)]

        # End of signal?
        if len(split) < int(SIGNAL_LENGTH * SAMPLE_RATE):
            break
        
        sig_splits.append(split)
        
    # Extract mel spectrograms for each audio chunk
    s_cnt = 0
    saved_samples = []
    for chunk in sig_splits:
        
        hop_length = int(SIGNAL_LENGTH * SAMPLE_RATE / (SPEC_SHAPE[1] - 1))
        mel_spec = librosa.feature.melspectrogram(y=chunk, 
                                                  sr=SAMPLE_RATE, 
                                                  n_fft=1024, 
                                                  hop_length=hop_length, 
                                                  n_mels=SPEC_SHAPE[0], 
                                                  fmin=FMIN, 
                                                  fmax=FMAX)
    
        mel_spec = librosa.power_to_db(mel_spec, ref=np.max) 
        
        # Normalize
        mel_spec -= mel_spec.min()
        mel_spec /= mel_spec.max()
        
        # Save as image file
        save_dir = os.path.join(output_dir, primary_label)
        if not os.path.exists(save_dir):
            os.makedirs(save_dir)
        save_path = os.path.join(save_dir, filepath.rsplit(os.sep, 1)[-1].rsplit('.', 1)[0] + 
                                 '_' + str(s_cnt) + '.png')
        im = Image.fromarray(mel_spec * 255.0).convert("L")
        im.save(save_path)
        
        saved_samples.append(save_path)
        s_cnt += 1
        
        
    return saved_samples

In [None]:
from joblib import Parallel, delayed
import multiprocessing as mp

def create_data(paths, output_dir, parallel=False):
    count = 0
    for path in paths:
        for dirname, _, filenames in os.walk(path):
            count += 1
            labels = []
            features = []
            print("doing " + dirname + " number: " + str(count) + "\t")
            primary_label = dirname.split("/")[-1]
            if parallel:
                all_paths = []
                for i, filename in enumerate(filenames):
                    all_paths.append(os.path.join(path, primary_label, filename))
                Parallel(prefer='threads', n_jobs=mp.cpu_count(), verbose=0)(delayed(get_spectrograms)(path, primary_label, output_dir) for path in all_paths)        
            else:
                all_paths = []
                for i, filename in enumerate(filenames):
                    print("done " + str(filename), end="\r")
                    all_paths.append(os.path.join(path, primary_label, filename))
                    get_spectrograms(os.path.join(path, primary_label, filename), primary_label, output_dir)

In [None]:
OUTPUT_DIR = '/kaggle/working/birdclef_2021/melspectrogram_dataset/'
for label in LABELS:
    if not os.path.exists(os.path.join(OUTPUT_DIR, label)):
        os.makedirs(os.path.join(OUTPUT_DIR, label))

In [None]:
import warnings
warnings.filterwarnings("ignore")
INPUT_DIR = '../input/birdclef-2021/train_short_audio/'
# path = '/kaggle/input/birdsong-recognition/train_audio'
# paths = ['/kaggle/input/xeno-canto-bird-recordings-extended-a-m/A-M', '/kaggle/input/xeno-canto-bird-recordings-extended-n-z/N-Z']
paths = [INPUT_DIR]
create_data(paths, OUTPUT_DIR, parallel=True)

In [None]:
# EXAMPLE_SPECIES = 'yetvir'
# files = []
# OUTPUT_DIR = '/kaggle/working/melspectrogram_dataset/'
# for (dirpath, dirnames, filenames) in os.walk(os.path.join(OUTPUT_DIR, EXAMPLE_SPECIES)):
#     files.extend(filenames)

In [None]:
# Plot the first 12 spectrograms of TRAIN_SPECS
# plt.figure(figsize=(15, 7))
# for i in range(12):
#     file_path = os.path.join(OUTPUT_DIR, EXAMPLE_SPECIES, files[i])
#     spec = Image.open(file_path)
#     plt.subplot(3, 4, i + 1)
#     plt.title(file_path.split(os.sep)[-1])
#     plt.imshow(spec, origin='lower')
#     plt.savefig(f'{EXAMPLE_SPECIES}: {files[i]}', dpi=400, bbox_inches='tight',pad_inches=0)
#     plt.close('all')