In [1]:
# System and utilities
import os
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')

# Data handling and processing
import numpy as np
import pandas as pd
from tqdm import tqdm
import matplotlib.pyplot as plt

# Audio processing
import librosa

# PyTorch imports
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from torchvision import models

# Local imports
from preprocess_birdnet import AudioPreprocessor, create_dataloaders

In [None]:
# First, we need to install BirdNET-Analyzer
# !pip install git+https://github.com/kahst/BirdNET-Analyzer.git

In [2]:
print(f"PyTorch version: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")

PyTorch version: 2.5.1+cpu
CUDA available: False


In [5]:
preprocessed_data = pd.read_csv('preprocessed_data.csv')

In [6]:
preprocessed_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 692 entries, 0 to 691
Data columns (total 10 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   genus            692 non-null    object 
 1   species          692 non-null    object 
 2   latitude         692 non-null    float64
 3   longitude        692 non-null    float64
 4   quality          692 non-null    object 
 5   file_name        692 non-null    object 
 6   simplified_type  692 non-null    object 
 7   season           692 non-null    object 
 8   time_of_day      692 non-null    object 
 9   length_seconds   692 non-null    int64  
dtypes: float64(2), int64(1), object(7)
memory usage: 54.2+ KB


In [None]:
# Run preprocessing if needed
if not os.path.exists('Processed_BirdNET/metadata.csv'):
    preprocessor = AudioPreprocessor(
        source_dir='Original Recordings',
        output_dir='Processed_BirdNET'
    )
    metadata = preprocessor.process_dataset()

# Create dataloaders
train_loader, val_loader, test_loader = create_dataloaders(
    'preprocessed_data.csv',
    batch_size=32
)

In [None]:
def visualize_spectrogram(loader):
    # Get one batch
    batch, labels = next(iter(loader))
    
    # Take first sample from batch
    spec = batch[0].squeeze().numpy()
    
    plt.figure(figsize=(10, 4))
    plt.imshow(spec, aspect='auto', origin='lower')
    plt.colorbar(format='%+2.0f dB')
    plt.title(f'Mel Spectrogram (shape: {spec.shape})')
    plt.show()
    
    print(f"Spectrogram shape: {spec.shape}")
    print(f"Value range: [{spec.min():.2f}, {spec.max():.2f}]")

# Use it after creating your loaders
visualize_spectrogram(train_loader)

In [None]:
def load_pretrained_birdnet():
    """
    Load the pretrained BirdNET model and convert it to PyTorch
    """
    # Load the TensorFlow model
    birdnet = tf.saved_model.load("path_to_birdnet_savedmodel")
    
    # Convert significant layers to PyTorch
    class BirdNETFineTune(nn.Module):
        def __init__(self, original_model, num_classes):
            super(BirdNETFineTune, self).__init__()
            
            # Copy the architecture and weights from original model
            self.features = convert_tf_to_pytorch(original_model)
            
            # Get the number of features from the original model
            num_features = self.features[-1].out_features
            
            # Add new classification head for your specific number of classes
            self.classifier = nn.Sequential(
                nn.Linear(num_features, 512),
                nn.ReLU(),
                nn.Dropout(0.5),
                nn.Linear(512, num_classes)
            )
            
        def forward(self, x):
            x = self.features(x)
            x = self.classifier(x)
            return x

In [None]:
def preprocess_audio(audio_path):
    """
    Preprocess audio file according to BirdNET specifications
    """
    # Load audio using librosa
    audio, sr = librosa.load(audio_path, sr=48000)  # BirdNET expects 48kHz
    
    # Create spectrogram using BirdNET's specific parameters
    spec = librosa.feature.melspectrogram(
        y=audio,
        sr=sr,
        n_fft=2048,
        hop_length=1024,
        n_mels=128,
        fmin=0,
        fmax=24000
    )
    
    # Convert to log scale
    spec = librosa.power_to_db(spec, ref=np.max)
    
    # Normalize
    spec = (spec - spec.mean()) / spec.std()
    
    return torch.FloatTensor(spec).unsqueeze(0)  # Add channel dimension


In [None]:
def fine_tune_birdnet(num_classes, train_loader):
    """
    Fine-tune BirdNET for your specific dataset
    """
    # Load pretrained model
    original_model = load_pretrained_birdnet()
    model = BirdNETFineTune(original_model, num_classes)
    
    # Freeze early layers
    for param in model.features[:-2].parameters():
        param.requires_grad = False
    
    # Training setup
    criterion = nn.CrossEntropyLoss()
    optimizer = torch.optim.Adam(
        filter(lambda p: p.requires_grad, model.parameters()),
        lr=0.0001
    )
    
    return model, criterion, optimizer

In [None]:
# Example usage
num_classes = 121  # Your number of species
model, criterion, optimizer = fine_tune_birdnet(num_classes, train_loader)