# BirdCLEF+ 2025 Competition: CPU-Optimized Ensemble Model

This notebook implements a time-efficient ensemble approach for the BirdCLEF+ 2025 competition, designed to run within the 90-minute CPU runtime limit.

## Competition Requirements

- CPU Notebook ≤ 90 minutes run-time
- GPU Notebook submissions are disabled (only 1 minute of runtime)
- Internet access disabled
- Freely & publicly available external data is allowed, including pre-trained models
- Submission file must be named submission.csv

## 1. Load Required Libraries

Import only the essential libraries to reduce startup time.

In [None]:
# Import necessary libraries
import os
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import Dataset, DataLoader
import librosa
from sklearn.preprocessing import LabelEncoder
import pickle
import hashlib
from pathlib import Path
import warnings

# IMPORTANT: Prevent any internet access attempts in Kaggle environment
os.environ['NO_PROXY'] = '*'

# Suppress warnings to reduce output clutter
warnings.filterwarnings('ignore')

# Set seeds for reproducibility
np.random.seed(42)
torch.manual_seed(42)

# Check for CPU/GPU availability - we're restricted to CPU for this competition
device = torch.device("cpu")
print(f"Using device: {device}")

# Show pandas version for reference
print(f"Pandas version: {pd.__version__}")

## 2. Load and Explore Dataset

Efficiently load the dataset with minimal processing.

In [None]:
# Define paths to data based on Kaggle's file structure
BASE_DIR = "/kaggle/input/birdclef-2025" if os.path.exists("/kaggle/input") else "../input/birdclef-2025"
TRAIN_AUDIO_DIR = os.path.join(BASE_DIR, "train_audio")
TRAIN_SOUNDSCAPES_DIR = os.path.join(BASE_DIR, "train_soundscapes")
TEST_SOUNDSCAPES_DIR = os.path.join(BASE_DIR, "test_soundscapes")

# Check if we're running on Kaggle
is_kaggle = os.path.exists("/kaggle/input")
print(f"Running on Kaggle: {is_kaggle}")
print(f"Base directory: {BASE_DIR}")

# Load training metadata
train_csv_path = os.path.join(BASE_DIR, "train.csv")
if os.path.exists(train_csv_path):
    print(f"Loading training CSV file: {train_csv_path}")
    train_metadata = pd.read_csv(train_csv_path)
else:
    print(f"Training CSV file not found at {train_csv_path}")
    # Create a minimal structure for testing
    train_metadata = pd.DataFrame({
        'primary_label': ['species1', 'species2'] * 5,
        'filename': [f'dummy{i}.ogg' for i in range(10)],
        'duration': [5.0] * 10
    })

print(f"\nDataset overview: {len(train_metadata)} samples, {train_metadata['primary_label'].nunique()} species")

## 3. Efficient Audio Processing & Feature Extraction

Streamlined preprocessing and feature extraction optimized for CPU performance.

In [None]:
# Define preprocessing parameters - optimized for speed
SAMPLE_RATE = 32000  # Common for bird sound analysis
MAX_AUDIO_LENGTH = 5  # Maximum audio length in seconds 
AUDIO_LENGTH_SAMPLES = MAX_AUDIO_LENGTH * SAMPLE_RATE
N_MELS = 128  # Number of MEL bands
N_MFCC = 20  # Reduced number of MFCCs for efficiency
HOP_LENGTH = 512
N_FFT = 1024  # Reduced from 2048 for speed

# Set up caching to avoid recomputing features
CACHE_DIR = Path("./feature_cache")
CACHE_DIR.mkdir(exist_ok=True)

def load_audio_file(file_path, sr=SAMPLE_RATE, duration=None):
    """Load audio file with optional resampling and duration limit"""
    try:
        # Use mono=True to reduce computation
        audio, _ = librosa.load(file_path, sr=sr, duration=duration, mono=True)
        return audio
    except Exception as e:
        print(f"Error loading {file_path}: {e}")
        return None

def pad_or_trim(audio, target_length=AUDIO_LENGTH_SAMPLES):
    """Pad with zeros or trim audio to target length"""
    if len(audio) < target_length:
        return np.pad(audio, (0, target_length - len(audio)), 'constant')
    else:
        return audio[:target_length]

def extract_features(audio, feature_type='mel'):
    """Extract either mel spectrograms or MFCCs"""
    if feature_type == 'mel':
        # Extract MEL spectrogram
        mel_spec = librosa.feature.melspectrogram(
            y=audio, 
            sr=SAMPLE_RATE, 
            n_mels=N_MELS,
            n_fft=N_FFT,
            hop_length=HOP_LENGTH
        )
        # Convert to decibels (log scale)
        mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)
        return mel_spec_db
    
    elif feature_type == 'mfcc':
        # Extract MFCCs
        mfccs = librosa.feature.mfcc(
            y=audio, 
            sr=SAMPLE_RATE, 
            n_mfcc=N_MFCC,
            n_fft=N_FFT, 
            hop_length=HOP_LENGTH
        )
        # Normalize
        mfccs = (mfccs - np.mean(mfccs)) / (np.std(mfccs) + 1e-8)
        return mfccs
    
    else:
        raise ValueError(f"Unknown feature type: {feature_type}")

def preprocess_and_extract(file_path, feature_type='mel', use_cache=True):
    """End-to-end preprocessing and feature extraction with caching"""
    # Generate cache key
    cache_key = hashlib.md5(f"{file_path}_{feature_type}_{SAMPLE_RATE}_{N_MELS}_{N_FFT}".encode()).hexdigest()
    cache_path = CACHE_DIR / f"{cache_key}.npy"
    
    # Try to load from cache first
    if use_cache and cache_path.exists():
        try:
            return np.load(cache_path)
        except:
            pass  # Fall back to recomputing if cache load fails
            
    # Process audio
    audio = load_audio_file(file_path, duration=MAX_AUDIO_LENGTH)
    if audio is None:
        return None
    
    audio = pad_or_trim(audio)
    features = extract_features(audio, feature_type)
    
    # Save to cache
    if use_cache:
        try:
            np.save(cache_path, features)
        except:
            pass  # Continue even if cache saving fails
            
    return features

# Function to batch process a subset of files
def batch_process_files(file_list, max_files=100, feature_type='mel'):
    """Process a batch of files and return features and labels"""
    features = []
    labels = []
    filenames = []
    
    # Use a limited number of files to fit within time constraints
    subset_files = file_list[:max_files]
    
    print(f"Processing {len(subset_files)} audio files...")
    for idx, (filename, label) in enumerate(subset_files):
        if idx % 10 == 0:
            print(f"Processing file {idx+1}/{len(subset_files)}")
            
        # Find file path
        file_path = os.path.join(TRAIN_AUDIO_DIR, filename)
        if not os.path.exists(file_path):
            # Try with species subfolder
            file_path = os.path.join(TRAIN_AUDIO_DIR, label, filename)
            if not os.path.exists(file_path):
                continue
        
        # Extract features
        feature = preprocess_and_extract(file_path, feature_type)
        if feature is not None:
            features.append(feature)
            labels.append(label)
            filenames.append(filename)
    
    return features, labels, filenames

## 4. Load Pre-trained Models

For efficiency, we'll use lightweight pre-trained models.

In [None]:
# Encode class labels
label_encoder = LabelEncoder()
train_metadata['label_encoded'] = label_encoder.fit_transform(train_metadata['primary_label'])
num_classes = len(label_encoder.classes_)

print(f"Total number of classes: {num_classes}")

# Create directory for model checkpoints
MODEL_DIR = Path("./model_checkpoints")
MODEL_DIR.mkdir(exist_ok=True)

# Save the label encoder for inference
with open(MODEL_DIR / "label_encoder.pkl", 'wb') as f:
    pickle.dump(label_encoder, f)

In [None]:
# Define lightweight model architectures optimized for CPU performance

# Import pre-trained model functions
from torchvision import models
import timm

# 1. Pre-trained CNN for spectrograms
class LocalPretrainedCNN(nn.Module):
    def __init__(self, weights_path, num_classes=200):
        super(LocalPretrainedCNN, self).__init__()
        
        # Initialize model without downloading weights
        self.model = models.mobilenet_v2(pretrained=False)
        
        # Load weights from local file
        if os.path.exists(weights_path):
            self.model.load_state_dict(torch.load(weights_path))
            print(f"Loaded weights from {weights_path}")
        else:
            print(f"Warning: Weights file not found at {weights_path}. Using random initialization.")
        
        # Modify first layer to accept single channel input (grayscale spectrograms)
        first_conv = self.model.features[0][0]
        self.model.features[0][0] = nn.Conv2d(
            1, first_conv.out_channels,
            kernel_size=first_conv.kernel_size,
            stride=first_conv.stride,
            padding=first_conv.padding,
            bias=first_conv.bias is not None
        )
        
        # Replace classifier with a new one for our number of classes
        in_features = self.model.classifier[1].in_features
        self.model.classifier[1] = nn.Linear(in_features, num_classes)
    
    def forward(self, x):
        if len(x.shape) == 3:  # [batch, freq, time]
            x = x.unsqueeze(1)  # [batch, channel, freq, time]
        
        # Ensure input has at least 32x32 dimensions (minimum for MobileNetV2)
        if x.size(2) < 32 or x.size(3) < 32:
            x = F.interpolate(x, size=(32, 32), mode='bilinear', align_corners=False)
            
        return self.model(x)

# 2. Lightweight CRNN with pre-trained CNN backbone
class LocalPretrainedCRNN(nn.Module):
    def __init__(self, weights_path, num_classes=200):
        super(LocalPretrainedCRNN, self).__init__()
        
        # Initialize ResNet18 without downloading weights
        self.backbone = models.resnet18(pretrained=False)
        
        # Load weights from local file
        if os.path.exists(weights_path):
            self.backbone.load_state_dict(torch.load(weights_path))
            print(f"Loaded weights from {weights_path}")
        else:
            print(f"Warning: Weights file not found at {weights_path}. Using random initialization.")
            
        # Modify first layer to accept single channel input
        self.backbone.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)
        
        # Get feature dimension
        feature_dim = self.backbone.fc.in_features
        
        # Remove fully connected layer
        self.backbone.fc = nn.Identity()
        
        # RNN layer
        self.gru = nn.GRU(
            input_size=feature_dim,
            hidden_size=128,
            batch_first=True,
            bidirectional=True
        )
        
        # Classifier
        self.classifier = nn.Linear(256, num_classes)  # 128*2 (bidirectional)
    
    def forward(self, x):
        if len(x.shape) == 3:  # [batch, freq, time]
            x = x.unsqueeze(1)  # [batch, channel, freq, time]
        
        batch_size = x.size(0)
        
        # Ensure minimum size for backbone
        if x.size(2) < 32 or x.size(3) < 32:
            x = F.interpolate(x, size=(224, 224), mode='bilinear', align_corners=False)
        
        # Extract features with CNN backbone
        features = self.backbone(x)  # [batch, feature_dim]
        
        # Reshape for RNN - treating feature_dim as sequence length
        features = features.unsqueeze(1).repeat(1, 16, 1)  # [batch, seq_len=16, feature_dim]
        
        # Apply RNN
        output, _ = self.gru(features)  # [batch, seq_len, 2*hidden_size]
        
        # Take the last time step
        output = output[:, -1, :]
        
        # Classification
        output = self.classifier(output)
        return output

# 3. MLP for MFCC features - this is kept simple and trained from scratch
class LightweightMLP(nn.Module):
    def __init__(self, input_features=500, num_classes=200):
        super(LightweightMLP, self).__init__()
        
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(input_features, 256)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, num_classes)
        
    def forward(self, x):
        if len(x.shape) > 2:
            # Flatten if not already flat
            x = self.flatten(x)
            
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

# 4. Pre-trained EfficientNet model for improved accuracy
class LocalPretrainedEfficientNet(nn.Module):
    def __init__(self, weights_path, num_classes=200):
        super(LocalPretrainedEfficientNet, self).__init__()
        
        # Initialize EfficientNet-B0 without downloading weights
        self.model = models.efficientnet_b0(pretrained=False)
        
        # Load weights from local file
        if os.path.exists(weights_path):
            self.model.load_state_dict(torch.load(weights_path))
            print(f"Loaded weights from {weights_path}")
        else:
            print(f"Warning: Weights file not found at {weights_path}. Using random initialization.")
        
        # Modify first layer to accept single channel input (grayscale spectrograms)
        # EfficientNet has a different structure than MobileNetV2 or ResNet
        self.model.features[0][0] = nn.Conv2d(
            1, 32,  # EfficientNet-B0 first layer has 32 output channels
            kernel_size=3,
            stride=2,
            padding=1,
            bias=False
        )
        
        # Replace classifier with a new one for our number of classes
        in_features = self.model.classifier[1].in_features
        self.model.classifier[1] = nn.Linear(in_features, num_classes)
    
    def forward(self, x):
        if len(x.shape) == 3:  # [batch, freq, time]
            x = x.unsqueeze(1)  # [batch, channel, freq, time]
        
        # EfficientNet expects minimum input size of 32x32
        if x.size(2) < 32 or x.size(3) < 32:
            x = F.interpolate(x, size=(224, 224), mode='bilinear', align_corners=False)
            
        return self.model(x)

## 5. CPU-Efficient Training Strategy

We'll use a time-efficient approach that combines cached features and pre-trained models.

In [None]:
class BirdDataset(Dataset):
    def __init__(self, features, labels):
        self.features = features
        self.labels = labels
        
    def __len__(self):
        return len(self.features)
    
    def __getitem__(self, idx):
        feature = torch.FloatTensor(self.features[idx])
        label = self.labels[idx]
        return feature, label

def train_model_efficiently(model, features, labels, val_split=0.2, batch_size=32, epochs=5, model_name="model"):
    """Train a model with early stopping and minimal overhead"""
    # Encode labels
    encoded_labels = label_encoder.transform(labels)
    
    # Split data
    split_idx = int(len(features) * (1 - val_split))
    train_features, val_features = features[:split_idx], features[split_idx:]
    train_labels, val_labels = encoded_labels[:split_idx], encoded_labels[split_idx:]
    
    # Create datasets
    train_dataset = BirdDataset(train_features, train_labels)
    val_dataset = BirdDataset(val_features, val_labels)
    
    # Create dataloaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size)
    
    # Optimizer and criterion
    optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
    criterion = nn.CrossEntropyLoss()
    
    # Training loop with early stopping
    best_loss = float('inf')
    patience = 2
    patience_counter = 0
    
    for epoch in range(epochs):
        # Train
        model.train()
        for features, targets in train_loader:
            optimizer.zero_grad()
            outputs = model(features)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
        
        # Validate
        model.eval()
        val_loss = 0
        correct = 0
        total = 0
        
        with torch.no_grad():
            for features, targets in val_loader:
                outputs = model(features)
                val_loss += criterion(outputs, targets).item()
                _, predicted = outputs.max(1)
                total += targets.size(0)
                correct += predicted.eq(targets).sum().item()
        
        val_loss /= len(val_loader)
        accuracy = correct / total
        
        print(f"Epoch {epoch+1}/{epochs}, Val Loss: {val_loss:.4f}, Accuracy: {accuracy:.4f}")
        
        # Check early stopping
        if val_loss < best_loss:
            best_loss = val_loss
            patience_counter = 0
            # Save model
            torch.save(model.state_dict(), MODEL_DIR / f"{model_name}_best.pt")
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print(f"Early stopping after {epoch+1} epochs")
                break
    
    # Load best model
    model.load_state_dict(torch.load(MODEL_DIR / f"{model_name}_best.pt"))
    return model

## 6. Time-Efficient Ensemble Approach

We'll use a streamlined ensemble method to maximize accuracy without exceeding the runtime limit.

In [None]:
def efficient_ensemble(models, weights=None):
    """Create a simple wrapper for ensemble prediction"""
    if weights is None:
        weights = {name: 1/len(models) for name in models}
        
    def predict(x):
        all_probs = []
        for name, model in models.items():
            model.eval()
            with torch.no_grad():
                logits = model(x)
                probs = F.softmax(logits, dim=1)
                all_probs.append(weights[name] * probs)
                
        # Sum weighted probabilities
        ensemble_probs = sum(all_probs)
        return ensemble_probs
    
    return predict

## 7. Main Training and Prediction Pipeline

The main execution pipeline designed to complete within 90 minutes.

In [None]:
def main_pipeline(max_training_samples=1000, max_test_files=None):
    """Execute the full pipeline within time constraints"""
    print("Starting BirdCLEF 2025 prediction pipeline...")
    
    # Check if we're running in Kaggle environment and set paths accordingly
    if os.path.exists("/kaggle/input"):
        mobilenet_weights_path = "/kaggle/input/birdclef-pretrained-weights/mobilenet_v2_weights.pth"
        resnet_weights_path = "/kaggle/input/birdclef-pretrained-weights/resnet18_weights.pth"
        efficientnet_weights_path = "/kaggle/input/birdclef-pretrained-weights/efficientnet_b0_weights.pth"
        print(f"Using pre-trained weights from Kaggle dataset")
    else:
        # Local paths for testing
        mobilenet_weights_path = "./pretrained_weights/mobilenet_v2_weights.pth"
        resnet_weights_path = "./pretrained_weights/resnet18_weights.pth" 
        efficientnet_weights_path = "./pretrained_weights/efficientnet_b0_weights.pth"
        print(f"Using pre-trained weights from local paths")
        
    print(f"MobileNet weights path: {mobilenet_weights_path}")
    print(f"ResNet weights path: {resnet_weights_path}")
    print(f"EfficientNet weights path: {efficientnet_weights_path}")
    
    # 1. Prepare training data
    print("\n[1/5] Preparing training data...")
    train_data_subset = [(row['filename'], row['primary_label']) 
                         for _, row in train_metadata.iterrows()]
    
    # Balance the dataset - take equal samples per class if possible
    if len(train_data_subset) > max_training_samples:
        # Group by class
        class_files = {}
        for filename, label in train_data_subset:
            if label not in class_files:
                class_files[label] = []
            class_files[label].append((filename, label))
            
        # Calculate samples per class
        samples_per_class = max(1, max_training_samples // len(class_files))
        
        # Select balanced subset
        balanced_subset = []
        for label, files in class_files.items():
            balanced_subset.extend(files[:samples_per_class])
            
        # Further trim if still too many
        if len(balanced_subset) > max_training_samples:
            balanced_subset = balanced_subset[:max_training_samples]
            
        train_data_subset = balanced_subset
    
    print(f"Processing {len(train_data_subset)} training samples...")
    
    # 2. Extract features
    print("\n[2/5] Extracting features...")
    mel_features, mel_labels, _ = batch_process_files(train_data_subset, feature_type='mel')
    mfcc_features, mfcc_labels, _ = batch_process_files(train_data_subset, feature_type='mfcc')
    
    # 3. Train models
    print("\n[3/5] Training models (with local pre-trained weights)...")
    
    # Initialize models with local pre-trained weights
    cnn_model = LocalPretrainedCNN(weights_path=mobilenet_weights_path, num_classes=num_classes)
    crnn_model = LocalPretrainedCRNN(weights_path=resnet_weights_path, num_classes=num_classes)
    efficientnet_model = LocalPretrainedEfficientNet(weights_path=efficientnet_weights_path, num_classes=num_classes)
    
    # Calculate input size for MLP based on actual MFCC feature dimensions
    if mfcc_features and len(mfcc_features) > 0:
        mfcc_flattened_size = np.prod(mfcc_features[0].shape)
        mlp_model = LightweightMLP(input_features=mfcc_flattened_size, num_classes=num_classes)
    else:
        print("No MFCC features available, using default input size")
        mlp_model = LightweightMLP(num_classes=num_classes)
    
    # For pre-trained models, we can use fewer epochs for fine-tuning
    print("Fine-tuning pre-trained CNN on mel spectrograms...")
    cnn_model = train_model_efficiently(
        cnn_model, mel_features, mel_labels, 
        epochs=2, batch_size=32, model_name="pretrained_cnn"
    )
    
    print("Fine-tuning pre-trained CRNN on mel spectrograms...")
    crnn_model = train_model_efficiently(
        crnn_model, mel_features, mel_labels, 
        epochs=2, batch_size=32, model_name="pretrained_crnn"
    )
    
    print("Fine-tuning pre-trained EfficientNet on mel spectrograms...")
    efficientnet_model = train_model_efficiently(
        efficientnet_model, mel_features, mel_labels, 
        epochs=2, batch_size=32, model_name="pretrained_efficientnet"
    )
    
    print("Training MLP on MFCCs...")
    mlp_model = train_model_efficiently(
        mlp_model, mfcc_features, mfcc_labels, 
        epochs=3, batch_size=32, model_name="mlp"
    )
    
    # 4. Create ensemble
    print("\n[4/5] Creating ensemble model...")
    ensemble_models = {
        'cnn': cnn_model,
        'crnn': crnn_model,
        'efficientnet': efficientnet_model,
        'mlp': mlp_model
    }
    
    # Weighting for pre-trained models (giving more weight to pre-trained ones)
    ensemble_weights = {
        'cnn': 0.30,         # Pre-trained CNN (MobileNetV2)
        'crnn': 0.25,        # Pre-trained CRNN with temporal patterns (ResNet18)
        'efficientnet': 0.35,# Pre-trained EfficientNet (better performance)
        'mlp': 0.10          # MLP is simpler but adds diversity
    }
    
    ensemble_predict = efficient_ensemble(ensemble_models, ensemble_weights)
    
    # 5. Generate predictions on test data
    print("\n[5/5] Generating predictions on test data...")
    test_files = []
    
    # Find test files
    if os.path.exists(TEST_SOUNDSCAPES_DIR):
        for file in os.listdir(TEST_SOUNDSCAPES_DIR):
            if file.endswith('.ogg') or file.endswith('.wav'):
                test_files.append(os.path.join(TEST_SOUNDSCAPES_DIR, file))
                
        if max_test_files:
            test_files = test_files[:max_test_files]
            
        print(f"Found {len(test_files)} test files")
    else:
        print(f"Test directory not found: {TEST_SOUNDSCAPES_DIR}")
        print("Creating mock test data for demonstration")
        # Create mock test data
        test_files = [f"mock_test_{i}.ogg" for i in range(10)]
    
    # Process test files
    print("Processing test files...")
    all_predictions = []
    test_file_ids = []
    
    for test_file in test_files:
        # Get file ID (basename without extension)
        file_id = os.path.splitext(os.path.basename(test_file))[0]
        test_file_ids.append(file_id)
        
        # Process audio (in a real scenario)
        if os.path.exists(test_file):
            # Extract features
            mel_features = preprocess_and_extract(test_file, feature_type='mel')
            
            # Convert to tensor and add batch dimension
            mel_tensor = torch.FloatTensor(mel_features).unsqueeze(0)
            
            # Get ensemble predictions
            predictions = ensemble_predict(mel_tensor)
            all_predictions.append(predictions.squeeze().numpy())
        else:
            # For demonstration, create random predictions
            print(f"Test file not found, using mock predictions: {test_file}")
            mock_preds = np.random.random(num_classes)
            mock_preds = mock_preds / mock_preds.sum()  # Normalize to sum to 1
            all_predictions.append(mock_preds)
    
    # Create submission file
    print("Creating submission file...")
    submission_entries = []
    
    # Format according to competition requirements
    for file_idx, file_id in enumerate(test_file_ids):
        file_preds = all_predictions[file_idx]
        
        for class_idx, prob in enumerate(file_preds):
            species_name = label_encoder.classes_[class_idx]
            row_id = f"{file_id}_{species_name}"
            
            submission_entries.append({
                "row_id": row_id,
                "target": float(prob)
            })
    
    submission_df = pd.DataFrame(submission_entries)
    
    # Save submission file
    submission_path = "submission.csv"
    submission_df.to_csv(submission_path, index=False)
    
    print(f"\nSubmission file created at {submission_path} with {len(submission_entries)} entries")
    print("Sample of submission file:")
    print(submission_df.head())
    
    return submission_df

In [None]:
# Execute the full pipeline
# Adjust max_training_samples based on available time
submission = main_pipeline(max_training_samples=500)

## Conclusion

This notebook implements a CPU-optimized ensemble approach for the BirdCLEF+ 2025 competition, carefully designed to run within the 90-minute runtime limit. The key optimizations include:

1. Streamlined preprocessing and feature extraction with caching
2. Using pre-trained models (MobileNetV2 and EfficientNet/ResNet) for better starting performance
3. Balanced dataset sampling to improve accuracy while reducing computation time
4. Efficient ensemble prediction with minimal overhead
5. Early stopping to avoid unnecessary training iterations

By leveraging transfer learning from pre-trained models while maintaining efficient processing, this solution provides an excellent balance of accuracy and speed for the competition constraints.