In [92]:
import os
import sys
import logging
import numpy as np
import torch
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn
from sklearn.model_selection import train_test_split
import torch.optim as optim
import pickle
import pandas as pd
from tqdm import tqdm
import json
from datetime import datetime
from collections import Counter

In [81]:
# Download YAMNet model
if not os.path.exists('yamnet.tflite'):
    print("Downloading YAMNet model...")
    import urllib.request
    urllib.request.urlretrieve('https://storage.googleapis.com/mediapipe-models/audio_classifier/yamnet/float32/1/yamnet.tflite', 'yamnet.tflite')


In [82]:
# Load the mapping
features_dir = "yamnet_embeddings"
with open(os.path.join(features_dir, "mapping.pkl"), "rb") as f:
    mapping = pickle.load(f)


In [83]:
# Define paths
base_dir = "/data/birdclef/birdclef-2025"
train_audio_dir = os.path.join(base_dir, "train_audio")
train_csv_path = os.path.join(base_dir, "train.csv")
taxonomy_path = os.path.join(base_dir, "taxonomy.csv")

# Create results directory
results_dir = 'training_results/yamnet_training_results'
os.makedirs(results_dir, exist_ok=True)

# Load metadata
train_df = pd.read_csv(train_csv_path)
taxonomy_df = pd.read_csv(taxonomy_path)

In [84]:
# Create lists of files and labels
feature_files = []
labels = []

# Create a mapping from primary_label to class index
unique_labels = train_df['primary_label'].unique()
label_to_idx = {label: idx for idx, label in enumerate(unique_labels)}
idx_to_label = {idx: label for label, idx in label_to_idx.items()}

print(f"Total number of species: {len(unique_labels)}")

Total number of species: 206


In [85]:
for feature_file, info in mapping.items():
    feature_files.append(os.path.join(features_dir, feature_file))
    labels.append(label_to_idx[info["label"]])

# Create a dataset that loads pre-extracted features
class PrecomputedFeaturesDataset(Dataset):
    def __init__(self, feature_files, labels):
        self.feature_files = feature_files
        self.labels = labels
        
    def __len__(self):
        return len(self.feature_files)
    
    def __getitem__(self, idx):
        # Load the pre-extracted feature
        feature = np.load(self.feature_files[idx])
        label = self.labels[idx]
        
        return torch.tensor(feature, dtype=torch.float32), torch.tensor(label, dtype=torch.long)

In [86]:
# Split into train/val and create datasets
# (code for splitting data)
train_feature_files, val_feature_files, train_labels, val_labels = train_test_split(
    feature_files, labels, test_size=0.2, random_state=42
)

print(f"Training samples: {len(train_feature_files)}")
print(f"Validation samples: {len(val_feature_files)}")


# Create datasets
train_files = PrecomputedFeaturesDataset(train_feature_files, train_labels)
val_files = PrecomputedFeaturesDataset(val_feature_files, val_labels)

# Create dataloaders and proceed with training as usual

Training samples: 22851
Validation samples: 5713


In [87]:
class BirdCLEFDataset(Dataset):
    def __init__(self, file_paths, labels, cache_dir='yamnet_features_cache'):
        self.file_paths = file_paths
        self.labels = labels
        self.cache_dir = cache_dir
        
    def __len__(self):
        return len(self.file_paths)
    
    def __getitem__(self, idx):
        audio_path = self.file_paths[idx]
        label = self.labels[idx]
        
        # Create cache filename
        cache_filename = str(hash(audio_path)) + '.npy'
        cache_path = os.path.join(self.cache_dir, cache_filename)
        
        # Load cached features if available
        if os.path.exists(cache_path):
            features = np.load(cache_path)
        else:
            # Fall back to extraction if not cached
            features = extract_yamnet_embeddings(audio_path)
            
            # Save for future use
            os.makedirs(self.cache_dir, exist_ok=True)
            np.save(cache_path, features)
        
        return torch.tensor(features, dtype=torch.float32), torch.tensor(label, dtype=torch.long)

In [88]:
# Create datasets and dataloaders
train_dataset = BirdCLEFDataset(train_files, train_labels)
val_dataset = BirdCLEFDataset(val_files, val_labels)


In [89]:
# Create a simple classifier model
class BirdCLEFClassifier(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(BirdCLEFClassifier, self).__init__()
        self.fc1 = nn.Linear(input_dim, 256)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)
        self.fc2 = nn.Linear(256, output_dim)
        
    def forward(self, x):
        x = self.fc1(x)
        x = self.relu(x)
        x = self.dropout(x)
        x = self.fc2(x)
        return x

In [93]:
# Training loop
train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)

# Initialize model, loss function, and optimizer
input_dim = 1024  # YAMNet has 521 classes
output_dim = len(unique_labels)

# model = BirdCLEFClassifier(input_dim=1024, hidden_dim=512, output_dim=len(unique_labels))
model = BirdCLEFClassifier(input_dim=1024, output_dim=len(unique_labels))

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)  # Higher learning rate

# Initialize results tracking
results = {
    'epochs': [],
    'train_loss': [],
    'val_loss': [],
    'accuracy': [],
    'timestamp': []
}

num_epochs = 20  # Increase epochs
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


Using device: cuda


In [94]:
# Training loop

model.to(device)

print("\nStarting training...")
for epoch in range(num_epochs):
    model.train()
    train_loss = 0.0
    
    # Training loop with less verbose progress bar
    for batch_idx, (features, labels) in enumerate(tqdm(train_loader, 
                                                       desc=f"Epoch {epoch+1}/{num_epochs}",
                                                       leave=False, ncols=80)):
        features, labels = features.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(features)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        train_loss += loss.item()
        
        # Print diagnostic info for first few batches
        if epoch == 0 and batch_idx < 3:
            print(f"\nDiagnostic - Batch {batch_idx}:")
            print(f"Features stats: min={features.min().item():.4f}, max={features.max().item():.4f}")
            print(f"Outputs: {outputs[0][:5].tolist()}")  # First 5 logits of first sample
            print(f"Label counts in batch: {torch.bincount(labels, minlength=5)[:5]}")
    
    # Validation
    model.eval()
    val_loss = 0.0
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for features, labels in val_loader:
            features, labels = features.to(device), labels.to(device)
            outputs = model(features)
            loss = criterion(outputs, labels)
            val_loss += loss.item()
            
            # Get predicted class
            probs = torch.softmax(outputs, dim=1)
            _, preds = torch.max(probs, 1)
            
            all_preds.extend(preds.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    # Calculate metrics
    train_loss /= len(train_loader)
    val_loss /= len(val_loader)
    
    # Direct calculation of accuracy
    correct = sum(1 for x, y in zip(all_preds, all_labels) if x == y)
    total = len(all_labels)
    accuracy = correct / total if total > 0 else 0
    
    # Store results
    results['epochs'].append(epoch + 1)
    results['train_loss'].append(float(train_loss))
    results['val_loss'].append(float(val_loss))
    results['accuracy'].append(float(accuracy))
    results['timestamp'].append(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
    
    # Print progress
    print(f"Epoch {epoch+1}/{num_epochs}: Train Loss={train_loss:.4f}, Val Loss={val_loss:.4f}, Accuracy={accuracy:.4f}, Correct={correct}/{total}")
    
    # Print prediction distribution
    pred_counts = Counter(all_preds)
    print(f"Prediction distribution: {len(pred_counts)} classes predicted")
    if len(pred_counts) < 5:
        print("WARNING: Model is only predicting a few classes!")
        for pred, count in pred_counts.most_common():
            print(f"  Class {idx_to_label[pred]}: {count} predictions ({count/len(all_preds)*100:.1f}%)")
    
    # Save results every 5 epochs
    if (epoch + 1) % 5 == 0 or epoch == num_epochs - 1:
        # Save results to JSON
        results_file = os.path.join(results_dir, f'results_epoch_{epoch+1}.json')
        with open(results_file, 'w') as f:
            json.dump(results, f, indent=4)
        
        # Save model checkpoint
        model_file = os.path.join(results_dir, f'model_epoch_{epoch+1}.pt')
        torch.save({
            'epoch': epoch + 1,
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
            'train_loss': train_loss,
            'val_loss': val_loss,
            'accuracy': accuracy
        }, model_file)
        
        print(f"Saved results and model checkpoint at epoch {epoch+1}")

print("Training complete!")
print(f"Processed {processed_files} files, {successful_files} successful ({successful_files/processed_files*100:.1f}%)")

# Print error summary
print("\nError Summary:")
for error_type, files in error_files.items():
    print(f"{error_type}: {len(files)} files. Examples: {', '.join(files[:3])}")



Starting training...


Epoch 1/20:   0%|                                      | 0/1429 [00:00<?, ?it/s]

                                                                                

NameError: name 'extract_yamnet_embeddings' is not defined