In [None]:
import numpy as np
import pandas as pd
import librosa
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, Callback
from tensorflow.keras.utils import to_categorical
from tqdm import tqdm
import glob
import os
from concurrent.futures import ThreadPoolExecutor, as_completed
import joblib

In [None]:
# Set the path to your audio files directory
data_dir = '/kaggle/input/birdclef-2024/train_audio'

# Load metadata
metadata = pd.read_csv('/kaggle/input/birdclef-2024/train_metadata.csv')

In [None]:
# Function to extract features from audio files
def extract_features(file_path):
    y, sr = librosa.load(file_path, sr=None)
    mel_spect = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
    mel_spect_db = librosa.power_to_db(mel_spect, ref=np.max)
    mel_spect_scaled = np.mean(mel_spect_db.T, axis=0)
    return mel_spect_scaled

# Function to process a single row of the metadata
def process_row(row):
    file_path = os.path.join(data_dir, row['filename'])
    if os.path.exists(file_path):
        class_label = row['primary_label']
        data = extract_features(file_path)
        return data, class_label
    return None, None

In [None]:
# Extract features and labels using parallel processing
features = []
labels = []

with ThreadPoolExecutor(max_workers=8) as executor:
    futures = [executor.submit(process_row, row) for index, row in metadata.iterrows()]
    for future in tqdm(as_completed(futures), total=len(futures)):
        data, class_label = future.result()
        if data is not None:
            features.append(data)
            labels.append(class_label)

# Convert to numpy arrays
X = np.array(features)
y = np.array(labels)

# Encode labels
le = LabelEncoder()
y_encoded = le.fit_transform(y)
y_onehot = to_categorical(y_encoded)

# Save the label encoder
joblib.dump(le, 'label_encoder.pkl')

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y_onehot, test_size=0.2, random_state=42)

# Reshape data for CNN
X_train = X_train.reshape((-1, 128, 1))
X_test = X_test.reshape((-1, 128, 1))

# Build the model
model = Sequential()
model.add(Conv2D(32, (3, 3), activation='relu', padding='same', input_shape=(128, 1, 1)))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 1)))
model.add(Conv2D(64, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 1)))
model.add(Conv2D(128, (3, 3), activation='relu', padding='same'))
model.add(BatchNormalization())
model.add(MaxPooling2D((2, 1)))
model.add(Flatten())
model.add(Dense(256, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(len(le.classes_), activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()


In [None]:
# Custom callback to ensure minimum number of epochs
class MinimumEpochs(Callback):
    def __init__(self, min_epochs=30):
        super(MinimumEpochs, self).__init__()
        self.min_epochs = min_epochs

    def on_epoch_end(self, epoch, logs=None):
        if epoch + 1 < self.min_epochs:
            self.model.stop_training = False

# Callbacks
min_epochs = 30
early_stopping = EarlyStopping(monitor='val_loss', patience=20, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=5, min_lr=1e-6)
minimum_epochs = MinimumEpochs(min_epochs=min_epochs)

# Train the model
history = model.fit(X_train, y_train, epochs=100, batch_size=64, validation_data=(X_test, y_test),
                    callbacks=[early_stopping, reduce_lr, minimum_epochs])

# Save the model
model.save('bird_cnn_model.h5')

In [None]:
# Evaluate the model
loss, accuracy = model.evaluate(X_test, y_test)

# Multiply accuracy by 100 and print it
accuracy_percent = accuracy * 100
print(f'{accuracy_percent:.2f}%')

In [None]:
# Function to check if a file is an audio file
def is_audio_file(file_path):
    try:
        with open(file_path, 'rb') as f:
            return librosa.get_samplerate(file_path) is not None
    except:
        return False

# Directory containing the test audio files
test_dir = '../input/birdclef-2024/test_soundscapes/'
if not os.path.exists(test_dir):
    raise FileNotFoundError(f"The directory {test_dir} does not exist.")

# List all files in the 'test_soundscapes' directory
test_files = os.listdir(test_dir)

# Filter out non-audio files
test_files = [file for file in test_files if is_audio_file(os.path.join(test_dir, file))]

# Function to extract features for test data
def extract_features_test(file_path, file_name):
    y, sr = librosa.load(file_path, sr=None)
    duration = librosa.get_duration(y=y, sr=sr)
    time_window = 5.0
    X_test = []
    row_ids = []
    for start_time in np.arange(0, duration, time_window):
        end_time = start_time + time_window
        if end_time > duration:
            break
        y_segment = y[int(start_time * sr):int(end_time * sr)]
        mel_spect = librosa.feature.melspectrogram(y=y_segment, sr=sr, n_mels=128)
        mel_spect_db = librosa.power_to_db(mel_spect, ref=np.max)
        mel_spect_reshaped = mel_spect_db.T[:128, :1].reshape(128, 1, 1)
        X_test.append(mel_spect_reshaped)
        row_id = f"{os.path.splitext(file_name)[0]}_{int(end_time)}"
        row_ids.append(row_id)
    return X_test, row_ids

X_test_list = []
row_ids_list = []

with ThreadPoolExecutor() as executor:
    futures = [executor.submit(extract_features_test, os.path.join(test_dir, file_name), file_name) for file_name in tqdm(test_files, desc="Submitting extraction tasks")]
    for future in tqdm(as_completed(futures), total=len(futures), desc="Extracting features"):
        X_test, row_ids = future.result()
        X_test_list.extend(X_test)
        row_ids_list.extend(row_ids)

X_test = np.array(X_test_list)

# Load the model
model = tf.keras.models.load_model('bird_cnn_model.h5')

# Perform predictions
batch_size = 32
predictions = []
for i in tqdm(range(0, len(X_test), batch_size), desc="Making predictions"):
    batch_predictions = model.predict(X_test[i:i + batch_size])
    predictions.extend(batch_predictions)

# Load the label encoder
le = joblib.load('label_encoder.pkl')

# Create a submission DataFrame
submission_df = pd.DataFrame(predictions, columns=[f'bird_id_{i}' for i in range(1, len(le.classes_) + 1)])
submission_df.insert(0, 'row_id', row_ids_list)

# Map bird IDs to species names
bird_id_to_species = {f'bird_id_{i+1}': species for i, species in enumerate(le.classes_)}

# Rename columns in the submission DataFrame
submission_df = submission_df.rename(columns=bird_id_to_species)

# Save the submission file
submission_path = 'submission.csv'
submission_df.to_csv(submission_path, index=False)

print("Updated submission file 'submission.csv' has been created successfully.")