In [None]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
import seaborn as sns
import IPython.display as ipd
from tqdm import tqdm
from PIL import Image
import tensorflow as tf
import cv2
import skimage.io
import librosa
import librosa.display
import sklearn.model_selection as sk
import tensorflow as tf
from keras.callbacks import EarlyStopping
from keras.callbacks import ModelCheckpoint

In [None]:
train_datadir = "../input/birdclef-2021/train_short_audio"
soundscapes = '../input/birdclef-2021/train_soundscapes'
train_csv = "../input/birdclef-2021/train_metadata.csv"
test_csv = "../input/birdclef-2021/test.csv"
train_soundscape = "../input/birdclef-2021/train_soundscape_labels.csv"
output_dir = "../output/kaggle/working"

In [None]:
sample_rate = 32000
signal_length = 5

duration = 15
offset = 0.0
hop_length = 512 # number of samples per time-step in spectrogram
n_mels = 128 # number of bins in spectrogram. Height of image
time_steps = 384 # number of time-steps. Width of image
threshold = 0.1
fmin = 500
fmax = 12500

spec_shape = (48, 128)

In [None]:
df_train_metadata = pd.read_csv(train_csv)
df_train_soundscape = pd.read_csv(train_soundscape)
df_test = pd.read_csv(test_csv)

In [None]:
df_train_metadata.hist(column='rating', figsize=(10, 5))

In [None]:
is_good_quality =  df_train_metadata['rating'] > 3

In [None]:
df_train_metadata.shape

In [None]:
df_train_metadata_filtered = df_train_metadata[is_good_quality]

In [None]:
df_train_metadata_filtered.shape

In [None]:
good_quality_audios = df_train_metadata_filtered['filename'].tolist()

In [None]:
print(len(good_quality_audios))

In [None]:
df_train_metadata_filtered['primary_label'].value_counts()[:200].plot(kind="bar", figsize=(25, 12), rot=90)

In [None]:
df_train_metadata_filtered.shape

In [None]:
filtered_files = set(df_train_metadata_filtered['filename'].tolist())

In [None]:
len(filtered_files)

In [None]:
def scale_minmax(x, min=0.0, max=1.0):
    x_std = (x - x.min()) / (x.max() - x.min())
    x_scaled = x_std * (max - min) + min
    return x_scaled

In [None]:
def extract_links_and_labels(data_dir):
    audio_clips = []
    folder_names = []
    file_names = []
    birds = os.listdir(data_dir)
    labels = []
    for bird in birds:
        for clip in os.listdir(data_dir + "/" + bird):
            if clip in filtered_files:
                folder_names.append(bird)
                file_names.append(clip.split(".ogg")[0])
                audio_clips.append(data_dir +  "/" + bird + "/" + clip)
                labels.append(bird)
    return folder_names, file_names, audio_clips, labels

In [None]:
folder_names, file_names, train_audio_clips, labels = extract_links_and_labels(train_datadir)

In [None]:
len(train_audio_clips)

In [None]:
!rm -rf kaggle/working
!mkdir train_images

In [None]:
def get_spectrograms(filepath, primary_label, directory):
    
    # Open the file with librosa (limited to the first 15 seconds)
    sig, rate = librosa.load(filepath, sr=sample_rate, offset=None, duration=duration)
    # Split signal into five second chunks
    sig_splits = []
    for i in range(0, len(sig), int(signal_length * sample_rate)):
        split = sig[i:i + int(signal_length * sample_rate)]

        # End of signal?
        if len(split) < int(signal_length * sample_rate):
            break
        sig_splits.append(split)
    
    # Extract mel spectrograms for each audio chunk
    s_cnt = 5
    saved_samples = []
    for chunk in sig_splits:
        hop_length = int(signal_length * sample_rate / (spec_shape[1] - 1))
        mel_spec = librosa.feature.melspectrogram(y=chunk, 
                                                  sr=sample_rate, 
                                                  n_fft=1024, 
                                                  hop_length=hop_length, 
                                                  n_mels=spec_shape[0], 
                                                  fmin=fmin, 
                                                  fmax=fmax)

        mel_spec = np.log(mel_spec + 1e-9) # add small number to avoid log(0)

        # min-max scale to fit inside 8-bit range
        mel_spec = scale_minmax(mel_spec, 0, 255).astype(np.uint8)
        mel_spec = np.flip(mel_spec, axis=0) # put low frequencies at the bottom in image
        mel_spec = 255 - mel_spec # invert. make black==more energy

        folder_dir = directory + primary_label
        # Save as image file        
        if not os.path.exists(folder_dir):
            os.makedirs(folder_dir)
        s_cnt_str = str(s_cnt)
        save_path = os.path.join(folder_dir, 
                                 filepath.rsplit(os.sep, 1)[-1].rsplit('.', 1)[0] + '_' + s_cnt_str + '.png')
        skimage.io.imsave(save_path, mel_spec)
        saved_samples.append(save_path)
        s_cnt += 5


In [None]:
# for i in tqdm(range(len(train_audio_clips))):
#     get_spectrograms(train_audio_clips[i], folder_names[i], './train_images/') 

In [None]:
def imagetensor(imagedir):  
    counter = 0
    for i, im in enumerate(os.listdir(imagedir)):
        if counter == counter_limit:
            counter = 0
            return images
        image= cv2.imread(os.path.join(imagedir, im), 0)
        image.resize(spec_shape)
        if i == 0:
            images= np.expand_dims(np.array(image, dtype= float) / 255, axis= 0)
            counter += 1
        else:
            image= np.expand_dims(np.array(image, dtype= float) / 255, axis= 0)
            images= np.append(images, image, axis= 0)
            counter += 1
    return images

In [None]:
# labels = []
# for label in os.listdir('./train_images/'):
#     if (not '.ipynb' in label and 
#         not 'h5'in label and 
#         not '.npy' in label and 
#         not '.txt' in label and 
#         not 'out' in label):
#         labels.append(label)
# print(len(labels))

In [None]:
# counter_limit = 500
# x_train = np.array([])
# y_train = []
# for idx, label in tqdm(enumerate(tqdm(labels))):
#     if idx == 0:
#         x_train= imagetensor('./train_images/' + label)
#         y_train.extend([label] * len(x_train))
#     else:
#         images = imagetensor('./train_images/' + label)
#         y_train.extend([label] * len(images))
#         x_train = np.vstack((x_train, images))

In [None]:
# inputs = x_train.reshape((x_train.shape[0],x_train.shape[1],x_train.shape[2], 1))
# outputs = pd.get_dummies(pd.Series(y_train)).to_numpy()

In [None]:
# print(inputs.shape, outputs.shape)

In [None]:
# np.savez_compressed('batch_reduced_48_128', x=inputs, y=outputs)

In [None]:
# b = os.path.getsize("batch_reduced_48_128.npz")
# print(b)

In [None]:
# batch_1 = np.load('batch_reduced_48_128.npz')

# inputs = batch_1['x']
# outputs = batch_1['y']

In [None]:
# SPEC_SHAPE = inputs[0].shape
# OUTPUT_SHAPE = outputs[0].shape
# SEED = 8000

In [None]:
# from sklearn.utils import shuffle
# inputs, outputs = shuffle(inputs, outputs)

In [None]:
# # Make sure your experiments are reproducible
# tf.random.set_seed(SEED)

# # Build a simple model as a sequence of  convolutional blocks.
# # Each block has the sequence CONV --> RELU --> BNORM --> MAXPOOL.
# # Finally, perform global average pooling and add 2 dense layers.
# # The last layer is our classification layer and is softmax activated.
# # (Well it's a multi-label task so sigmoid might actually be a better choice)
# model = tf.keras.Sequential([
    
#     # First conv block
#     tf.keras.layers.Conv2D(16, (3, 3), 
#                            activation='relu', 
#                            input_shape=(SPEC_SHAPE[0], SPEC_SHAPE[1], 1)),
#     tf.keras.layers.BatchNormalization(),
#     tf.keras.layers.MaxPooling2D((2, 2)),
    
#     # Second conv block
#     tf.keras.layers.Conv2D(32, (3, 3), activation='relu'),
#     tf.keras.layers.BatchNormalization(),
#     tf.keras.layers.MaxPooling2D((2, 2)), 
    
#     # Third conv block
#     tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
#     tf.keras.layers.BatchNormalization(),
#     tf.keras.layers.MaxPooling2D((2, 2)), 
    
#     # Fourth conv block
#     tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
#     tf.keras.layers.BatchNormalization(),
#     tf.keras.layers.MaxPooling2D((2, 2)),
    
#     # Global pooling instead of flatten()
#     tf.keras.layers.GlobalAveragePooling2D(), 
    
#     # Dense block
#     tf.keras.layers.Dense(256, activation='relu'),   
#     tf.keras.layers.Dropout(0.5),  
#     tf.keras.layers.Dense(256, activation='relu'),   
#     tf.keras.layers.Dropout(0.5),
    
#     # Classification layer
#     tf.keras.layers.Dense(OUTPUT_SHAPE[0], activation='softmax')
# ])
# print('MODEL HAS {} PARAMETERS.'.format(model.count_params()))

In [None]:
# model.compile(
#     optimizer=tf.keras.optimizers.Adam(learning_rate=0.0015),
#     loss=tf.keras.losses.CategoricalCrossentropy(),
#     metrics=['accuracy', tf.keras.metrics.AUC()]
# )

In [None]:
# model.summary()

In [None]:
# early_stopping_monitor = EarlyStopping(
#     monitor='val_accuracy',
#     patience=100,
#     verbose=1,
#     mode='auto',
#     restore_best_weights=True
# )

# mcp_save = ModelCheckpoint(
#     './best_model.h5', 
#     save_best_only=True, 
#     monitor='val_accuracy',
#     verbose=1
# )

In [None]:
# model.fit(
#     inputs, 
#     outputs, 
#     epochs=1000,
#     batch_size=256, 
#     validation_split=0.2, 
#     callbacks=[early_stopping_monitor, mcp_save]
# )

In [None]:
!mkdir train_soundscapes

In [None]:
soundscape_links = []
filenames = []
for soundscape in os.listdir(soundscapes):
    soundscape_link = soundscapes + '/' + soundscape
    soundscape_links.append(soundscape_link)
    filenames.append(soundscape.split('.')[0])

In [None]:
duration = 600
for i in tqdm(range(len(soundscape_links))):
    get_spectrograms(soundscape_links[i], filenames[i], 'train_soundscapes/')

In [None]:
sorted_complete_links = []
sorted_col = []
counter = 5
for soundscape in os.listdir('./train_soundscapes'):
    counter = 5
    for img_path in os.listdir('./train_soundscapes/' + soundscape):
        while counter <= 600:
            folder_name = img_path.split('_')[0] + '_' + img_path.split('_')[1] + '_' + img_path.split('_')[2]
            file_name = img_path.split('_')[0] + '_' + img_path.split('_')[1] + '_' + img_path.split('_')[2] + '_' + str(counter) + '.png'
            counter += 5
            sorted_complete_links.append('./train_soundscapes/' + folder_name + '/' + file_name)

In [None]:
len(sorted_complete_links)

In [None]:
def imagetensor_2():  
    counter = 0
    for link in sorted_complete_links:
        image= cv2.imread(link, 0)
        image.resize(spec_shape)
        if counter == 0:
            images= np.expand_dims(np.array(image, dtype= float) / 255, axis= 0)
            counter += 1
        else:
            image= np.expand_dims(np.array(image, dtype= float) / 255, axis= 0)
            images= np.append(images, image, axis= 0)
            counter += 1
    return images

In [None]:
x_test = imagetensor_2()

In [None]:
print(x_test.shape)

In [None]:
x_test = x_test.reshape((2400, 48, 128, 1))

In [None]:
# np.savez_compressed('test_batch_reduced_48_128', x=x_test)

In [None]:
!rm -rf train_images

In [None]:
!rm -rf train_soundscapes

In [None]:
!rm -rf batch_reduced_48_128.npz

In [None]:
from tensorflow import keras
model = keras.models.load_model('../input/birds-2/best_model.h5')

In [None]:
model.save('best_model.h5')

In [None]:
p = model.predict(x_test)
y_prob = np.argmax(p, axis=1)

In [None]:
primary_labels = []
for folder_name in folder_names:
    if not folder_name in primary_labels:
        primary_labels.append(folder_name)

In [None]:
len(primary_labels)

In [None]:
sorted_file_links = []
for link in soundscape_links:
    counter = 5
    while (counter <= 600):
        curr_link = link.split('/')[-1].split('_')[0] + '_' + link.split('/')[-1].split('_')[1] + '_' + str(counter)
        sorted_file_links.append(curr_link)
        counter += 5
print(sorted_file_links[-1])

In [None]:
submission = []
for i in range(len(p)):
    data = {}
    label = sorted_file_links[i]
    data['row_id'] = label
    if (p[i][y_prob[i]]) < threshold:
        data['birds'] = 'nocall'
    else:
        data['birds'] = primary_labels[y_prob[i]]
    submission.append(data)

In [None]:
df_submission = pd.DataFrame(submission)
df_submission.shape

In [None]:
df_submission.to_csv("submission.csv", index=False)