load

In [3]:
import json

# Load JSON data
with open("filtered_data.json", "r") as f:
    bird_data = json.load(f)


import os

# ✅ Extract Bird Species Labels
labels = list(bird_data.keys())  # List all bird species
label_mapping = {species: idx for idx, species in enumerate(labels)}  # Assign numeric labels
reverse_label_mapping = {idx: species for species, idx in label_mapping.items()}  # Label → Bird

print(f"✅ Bird Species: {labels}")
print(f"✅ Label Mapping: {label_mapping}")

import os

data_list = []  # Stores (file_path, label) pairs
base_path = "wavfiles"  # Base directory for WAV files

# ✅ Collect File Paths & Labels
for bird_name, bird_list in bird_data.items():
    label = label_mapping[bird_name]
    for bird in bird_list:
        file_path = os.path.join(base_path, bird["filename"])

        # ✅ Check if file exists before adding
        if os.path.exists(file_path):
            data_list.append((file_path, label))
        else:
            print(f"⚠️ File not found: {file_path}")  # Debug missing files

print(f"✅ Total Files Processed: {len(data_list)}")
print(data_list)

✅ Bird Species: ['American Robin', "Bewick's Wren", 'Northern Cardinal', 'Northern Mockingbird', 'Song Sparrow']
✅ Label Mapping: {'American Robin': 0, "Bewick's Wren": 1, 'Northern Cardinal': 2, 'Northern Mockingbird': 3, 'Song Sparrow': 4}
✅ Total Files Processed: 500
[('wavfiles\\562221-1.wav', 0), ('wavfiles\\564324-5.wav', 0), ('wavfiles\\129798-0.wav', 0), ('wavfiles\\423449-0.wav', 0), ('wavfiles\\446458-1.wav', 0), ('wavfiles\\559314-7.wav', 0), ('wavfiles\\165292-8.wav', 0), ('wavfiles\\14442-13.wav', 0), ('wavfiles\\423449-2.wav', 0), ('wavfiles\\129798-13.wav', 0), ('wavfiles\\446458-0.wav', 0), ('wavfiles\\165272-7.wav', 0), ('wavfiles\\103060-14.wav', 0), ('wavfiles\\175222-10.wav', 0), ('wavfiles\\483578-15.wav', 0), ('wavfiles\\322887-8.wav', 0), ('wavfiles\\34766-11.wav', 0), ('wavfiles\\322822-8.wav', 0), ('wavfiles\\562449-6.wav', 0), ('wavfiles\\368615-10.wav', 0), ('wavfiles\\464766-9.wav', 0), ('wavfiles\\138063-8.wav', 0), ('wavfiles\\138063-7.wav', 0), ('wavfiles

transform

In [None]:
import torch
import torchaudio.transforms as T
import torchvision.transforms as transforms

# ✅ Define Transformations for Training
class MelTransform:
    def __init__(self, target_size=(128, 128)):
        self.resize = transforms.Resize(target_size)  # ✅ Resize to smaller shape

    def __call__(self, mel_spec):
        # ✅ Resize Spectrogram to Target Size
        mel_spec = self.resize(mel_spec.unsqueeze(0))  # Add channel dim for resizing
        mel_spec = mel_spec.squeeze(0)  # Remove extra dimension after resizing
        return mel_spec


data loader

In [10]:
import torch
import librosa
from torch.utils.data import Dataset
import numpy as np

class BirdSoundDataset(Dataset):
    def __init__(self, data_list):
        self.data_list = data_list
        self.transform = MelTransform()  


    def __len__(self):
        return len(self.data_list)

    def __getitem__(self, idx):
        file_path, label = self.data_list[idx]
        y, sr = librosa.load(file_path, sr=None)  # Load with original sample rate

        # ✅ Compute Mel Spectrogram
        mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
        mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)

        # ✅ Convert to PyTorch Tensor
        mel_tensor = torch.tensor(mel_spec_db, dtype=torch.float32).unsqueeze(0)  # ✅ Add channel dimension (1, 64, 64)
        label_tensor = torch.tensor(label, dtype=torch.long)
        
        #transform
        mel_tensor=self.transform(mel_tensor)
        

        return mel_tensor,sr, label_tensor,file_path

# ✅ Initialize Dataset
dataset = BirdSoundDataset(data_list)
print(f"✅ Dataset Initialized: {len(dataset)} samples")

from torch.utils.data import DataLoader

# ✅ Create DataLoader
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

from sklearn.model_selection import train_test_split

# ✅ Split Data: 80% Train, 10% Validation, 10% Test
train_files, temp_files = train_test_split(
    dataset.data_list, test_size=0.2, random_state=42, stratify=[label for _, label in dataset.data_list]
)

val_files, test_files = train_test_split(
    temp_files, test_size=0.5, random_state=42, stratify=[label for _, label in temp_files]
)

print(f"✅ Training Samples: {len(train_files)}")
print(f"✅ Validation Samples: {len(val_files)}")
print(f"✅ Testing Samples: {len(test_files)}")

# ✅ Define Batch Size
batch_size = 16  # Increased for more stable training

# ✅ Create Train, Validation, and Test Datasets
train_dataset = BirdSoundDataset(train_files)
val_dataset = BirdSoundDataset(val_files)
test_dataset = BirdSoundDataset(test_files)

# ✅ Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(f"✅ Train Batches: {len(train_loader)}, Val Batches: {len(val_loader)}, Test Batches: {len(test_loader)}")

# ✅ Fetch One Batch from Train Loader
train_batch = next(iter(train_loader))
mel_specs, sample_rates, labels, file_paths = train_batch

print(f"✅ Train Batch Shape: {mel_specs.shape}")  # Expected: (batch_size, 1, 128, 128)

✅ Dataset Initialized: 500 samples
✅ Training Samples: 400
✅ Validation Samples: 50
✅ Testing Samples: 50
✅ Train Batches: 25, Val Batches: 4, Test Batches: 4
✅ Train Batch Shape: torch.Size([16, 1, 128, 128])


CNN

In [11]:
from cnn import ImprovedBirdSoundCNN

import torch.nn as nn


import torch.optim as optim

# ✅ Device Configuration
device = "cuda" if torch.cuda.is_available() else "cpu"

# ✅ Initialize Model
num_classes = len(label_mapping)  # Number of bird species
model = ImprovedBirdSoundCNN(num_classes).to(device)

# ✅ Loss Function & Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)



TRAIN

In [12]:
import torch
import torch.optim as optim

num_epochs = 30  # Increased to ensure convergence
best_val_loss = float("inf")
patience = 7  # Early stopping patience (how many epochs to wait before stopping)
counter = 0  # Early stopping counter

# 🔹 Learning Rate Scheduler
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode="min", factor=0.5, patience=3, verbose=True)

for epoch in range(num_epochs):
    model.train()  # Set model to training mode
    running_loss = 0.0
    
    for mel_specs, sample_rates, labels, _ in train_loader:
        mel_specs, labels = mel_specs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(mel_specs)
        loss = criterion(outputs, labels)
        loss.backward()
        
        # 🔹 Prevent exploding gradients
        torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=5.0)
        
        optimizer.step()
        running_loss += loss.item()
    
    avg_train_loss = running_loss / len(train_loader)

    # 🔹 Validation Step
    model.eval()  # Set model to evaluation mode
    val_loss = 0.0
    correct, total = 0, 0

    with torch.no_grad():
        for mel_specs, sample_rates, labels, _ in val_loader:
            mel_specs, labels = mel_specs.to(device), labels.to(device)
            outputs = model(mel_specs)
            loss = criterion(outputs, labels)
            val_loss += loss.item()

            # 🔹 Compute Accuracy
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    avg_val_loss = val_loss / len(val_loader)
    val_acc = 100 * correct / total  # Convert to percentage

    # 🔹 Print Progress
    print(f"Epoch [{epoch+1}/{num_epochs}] - Train Loss: {avg_train_loss:.4f}, Val Loss: {avg_val_loss:.4f}, Val Acc: {val_acc:.2f}%")

    # 🔹 Save Best Model (Based on Validation Loss)
    if avg_val_loss < best_val_loss:
        best_val_loss = avg_val_loss
        torch.save(model.state_dict(), "best_model.pth")
        print("✅ Best Model Saved!")
        counter = 0  # Reset early stopping counter
    else:
        counter += 1  # Increase counter if no improvement

    # 🔹 Adjust Learning Rate
    scheduler.step(avg_val_loss)

    # 🔹 Early Stopping Condition
    if counter >= patience:
        print("🔥 Early Stopping Triggered! Stopping Training.")
        break

print("🚀 Training Complete!")




Epoch [1/30] - Train Loss: 3.9141, Val Loss: 2.6421, Val Acc: 38.00%
✅ Best Model Saved!
Epoch [2/30] - Train Loss: 1.6361, Val Loss: 1.2266, Val Acc: 40.00%
✅ Best Model Saved!
Epoch [3/30] - Train Loss: 1.0782, Val Loss: 0.8739, Val Acc: 52.00%
✅ Best Model Saved!
Epoch [4/30] - Train Loss: 0.9055, Val Loss: 0.8040, Val Acc: 52.00%
✅ Best Model Saved!
Epoch [5/30] - Train Loss: 0.7966, Val Loss: 0.7188, Val Acc: 56.00%
✅ Best Model Saved!
Epoch [6/30] - Train Loss: 0.7107, Val Loss: 0.8397, Val Acc: 68.00%
Epoch [7/30] - Train Loss: 0.7283, Val Loss: 0.7074, Val Acc: 68.00%
✅ Best Model Saved!
Epoch [8/30] - Train Loss: 0.6407, Val Loss: 0.7506, Val Acc: 60.00%
Epoch [9/30] - Train Loss: 0.6600, Val Loss: 0.7039, Val Acc: 60.00%
✅ Best Model Saved!
Epoch [10/30] - Train Loss: 0.5338, Val Loss: 0.6889, Val Acc: 66.00%
✅ Best Model Saved!
Epoch [11/30] - Train Loss: 0.5694, Val Loss: 0.9448, Val Acc: 66.00%
Epoch [12/30] - Train Loss: 0.5688, Val Loss: 1.0580, Val Acc: 52.00%
Epoch [13

TEST

In [14]:
# ✅ Load Best Model
model.load_state_dict(torch.load("best_model.pth"))
model.eval()

# ✅ Initialize Tracking Variables
correct_per_class = {class_name: 0 for class_name in reverse_label_mapping.values()}
total_per_class = {class_name: 0 for class_name in reverse_label_mapping.values()}
incorrect_predictions = []

test_loss = 0.0
correct = 0
total = 0

with torch.no_grad():
    for mel_specs, sample_rates, labels, file_paths in test_loader:
        mel_specs, labels = mel_specs.to(device), labels.to(device)
        outputs = model(mel_specs)
        loss = criterion(outputs, labels)
        test_loss += loss.item()

        # ✅ Get Predictions
        _, predicted = torch.max(outputs, 1)

        # ✅ Update Counters
        for i in range(len(labels)):
            true_label = labels[i].item()
            pred_label = predicted[i].item()
            true_class = reverse_label_mapping[true_label]
            pred_class = reverse_label_mapping[pred_label]

            total_per_class[true_class] += 1  # Track total samples per class

            if pred_label == true_label:
                correct_per_class[true_class] += 1
            else:
                incorrect_predictions.append((file_paths[i], true_class, pred_class))

        correct += (predicted == labels).sum().item()
        total += labels.size(0)

# ✅ Print Final Test Accuracy
test_acc = correct / total
print(f"🔥 Final Test Loss: {test_loss / len(test_loader):.4f}")
print(f"🎯 Final Test Accuracy: {test_acc:.2%}")

# ✅ Print Accuracy Per Class
print("\n✅ Class-wise Accuracy:")
for class_name in correct_per_class:
    if total_per_class[class_name] > 0:  # Avoid division by zero
        accuracy = correct_per_class[class_name] / total_per_class[class_name] * 100
        print(f"  🏷️ {class_name}: {accuracy:.2f}% ({correct_per_class[class_name]}/{total_per_class[class_name]})")


🔥 Final Test Loss: 0.5878
🎯 Final Test Accuracy: 74.00%

✅ Class-wise Accuracy:
  🏷️ American Robin: 40.00% (4/10)
  🏷️ Bewick's Wren: 90.00% (9/10)
  🏷️ Northern Cardinal: 90.00% (9/10)
  🏷️ Northern Mockingbird: 80.00% (8/10)
  🏷️ Song Sparrow: 70.00% (7/10)
