In [35]:
import json

# Load JSON data
with open("filtered_data.json", "r") as f:
    bird_data = json.load(f)


import os

# ✅ Extract Bird Species Labels
labels = list(bird_data.keys())  # List all bird species
label_mapping = {species: idx for idx, species in enumerate(labels)}  # Assign numeric labels
reverse_label_mapping = {idx: species for species, idx in label_mapping.items()}  # Label → Bird

print(f"✅ Bird Species: {labels}")
print(f"✅ Label Mapping: {label_mapping}")

import os

data_list = []  # Stores (file_path, label) pairs
base_path = "wavfiles"  # Base directory for WAV files

# ✅ Collect File Paths & Labels
for bird_name, bird_list in bird_data.items():
    label = label_mapping[bird_name]
    for bird in bird_list:
        file_path = os.path.join(base_path, bird["filename"])

        # ✅ Check if file exists before adding
        if os.path.exists(file_path):
            data_list.append((file_path, label))
        else:
            print(f"⚠️ File not found: {file_path}")  # Debug missing files

print(f"✅ Total Files Processed: {len(data_list)}")
print(data_list)

✅ Bird Species: ['American Robin', "Bewick's Wren", 'Northern Cardinal', 'Northern Mockingbird', 'Song Sparrow']
✅ Label Mapping: {'American Robin': 0, "Bewick's Wren": 1, 'Northern Cardinal': 2, 'Northern Mockingbird': 3, 'Song Sparrow': 4}
✅ Total Files Processed: 500
[('wavfiles\\562221-1.wav', 0), ('wavfiles\\564324-5.wav', 0), ('wavfiles\\129798-0.wav', 0), ('wavfiles\\423449-0.wav', 0), ('wavfiles\\446458-1.wav', 0), ('wavfiles\\559314-7.wav', 0), ('wavfiles\\165292-8.wav', 0), ('wavfiles\\14442-13.wav', 0), ('wavfiles\\423449-2.wav', 0), ('wavfiles\\129798-13.wav', 0), ('wavfiles\\446458-0.wav', 0), ('wavfiles\\165272-7.wav', 0), ('wavfiles\\103060-14.wav', 0), ('wavfiles\\175222-10.wav', 0), ('wavfiles\\483578-15.wav', 0), ('wavfiles\\322887-8.wav', 0), ('wavfiles\\34766-11.wav', 0), ('wavfiles\\322822-8.wav', 0), ('wavfiles\\562449-6.wav', 0), ('wavfiles\\368615-10.wav', 0), ('wavfiles\\464766-9.wav', 0), ('wavfiles\\138063-8.wav', 0), ('wavfiles\\138063-7.wav', 0), ('wavfiles

In [36]:
import torch
import torchaudio.transforms as T
import torchvision.transforms as transforms

# ✅ Define Transformations for Training
class MelTransform:
    def __init__(self, target_size=(64, 64)):
        self.resize = transforms.Resize(target_size)  # ✅ Resize to smaller shape

    def __call__(self, mel_spec):
        # ✅ Resize Spectrogram to Target Size
        mel_spec = self.resize(mel_spec.unsqueeze(0))  # Add channel dim for resizing
        mel_spec = mel_spec.squeeze(0)  # Remove extra dimension after resizing
        return mel_spec


In [37]:
import torch
import librosa
from torch.utils.data import Dataset
import numpy as np

class BirdSoundDataset(Dataset):
    def __init__(self, data_list):
        self.data_list = data_list
        self.transform = MelTransform(target_size=(64, 64))  # ✅ Default transform


    def __len__(self):
        return len(self.data_list)

    def __getitem__(self, idx):
        file_path, label = self.data_list[idx]
        y, sr = librosa.load(file_path, sr=None)  # Load with original sample rate

        # ✅ Compute Mel Spectrogram
        mel_spec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
        mel_spec_db = librosa.power_to_db(mel_spec, ref=np.max)

        # ✅ Convert to PyTorch Tensor
        mel_tensor = torch.tensor(mel_spec_db, dtype=torch.float32).unsqueeze(0)  # ✅ Add channel dimension (1, 64, 64)
        label_tensor = torch.tensor(label, dtype=torch.long)
        
        #transform
        mel_tensor=self.transform(mel_tensor)
        

        return mel_tensor,sr, label_tensor,file_path

# ✅ Initialize Dataset
dataset = BirdSoundDataset(data_list)
print(f"✅ Dataset Initialized: {len(dataset)} samples")

from torch.utils.data import DataLoader

# ✅ Create DataLoader
dataloader = DataLoader(dataset, batch_size=4, shuffle=True)

from sklearn.model_selection import train_test_split

# ✅ Split Data: 80% Train, 20% Test
train_files, test_files = train_test_split(dataset.data_list, test_size=0.2, random_state=42, stratify=[label for _, label in dataset.data_list])

print(f"✅ Training Samples: {len(train_files)}")
print(f"✅ Testing Samples: {len(test_files)}")

# ✅ Define Batch Size
batch_size = 16  # Increased for more stable training

# ✅ Create Train and Test Datasets
train_dataset = BirdSoundDataset(train_files)
test_dataset = BirdSoundDataset(test_files)

# ✅ Create DataLoaders
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

print(f"✅ Train Batches: {len(train_loader)}, Test Batches: {len(test_loader)}")

# ✅ Fetch One Batch from Train Loader
train_batch = next(iter(train_loader))
mel_specs, sample_rates, labels, file_paths = train_batch

print(f"✅ Train Batch Shape: {mel_specs.shape}")  # Should be (batch_size, 1, 64, 64)


✅ Dataset Initialized: 500 samples
✅ Training Samples: 400
✅ Testing Samples: 100
✅ Train Batches: 25, Test Batches: 7
✅ Train Batch Shape: torch.Size([16, 1, 64, 64])


In [38]:
import torch
import torch.nn as nn
import torch.nn.functional as F

# ✅ Define CNN Model for Mel Spectrograms
class ImprovedBirdSoundCNN(nn.Module):
    def __init__(self, num_classes):
        super(ImprovedBirdSoundCNN, self).__init__()
        
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.bn1 = nn.BatchNorm2d(32)
        self.pool = nn.MaxPool2d(2, 2)  # (64x64 → 32x32)
        
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.bn2 = nn.BatchNorm2d(64)
        self.pool2 = nn.MaxPool2d(2, 2)  # (32x32 → 16x16)

        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, stride=1, padding=1)
        self.bn3 = nn.BatchNorm2d(128)
        self.pool3 = nn.MaxPool2d(2, 2)  # (16x16 → 8x8)

        self.fc1 = nn.Linear(128 * 8 * 8, 256)
        self.dropout = nn.Dropout(0.3)  # Reduce overfitting
        self.fc2 = nn.Linear(256, num_classes)
        
    def forward(self, x):
        x = self.pool(F.relu(self.bn1(self.conv1(x))))  
        x = self.pool2(F.relu(self.bn2(self.conv2(x))))  
        x = self.pool3(F.relu(self.bn3(self.conv3(x))))  
        
        x = x.view(x.shape[0], -1)  # Flatten the tensor
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)  
        return x


import torch.optim as optim

# ✅ Device Configuration
device = "cuda" if torch.cuda.is_available() else "cpu"

# ✅ Initialize Model
num_classes = len(label_mapping)  # Number of bird species
model = ImprovedBirdSoundCNN(num_classes).to(device)

# ✅ Loss Function & Optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)



In [39]:
num_epochs = 10
best_test_loss = float("inf")

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    
    for mel_specs, sample_rates, labels, _ in train_loader:
        mel_specs, labels = mel_specs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(mel_specs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    
    avg_train_loss = running_loss / len(train_loader)

    # ✅ Print Progress
    print(f"Epoch [{epoch+1}/{num_epochs}] - Train Loss: {avg_train_loss:.4f}")

    # ✅ Save Best Model
    if avg_train_loss < best_test_loss:
        best_test_loss = avg_train_loss
        torch.save(model.state_dict(), "best_model.pth")
        print("✅ Best Model Saved!")

print("🔥 Training Complete!")


Epoch [1/10] - Train Loss: 2.7680
✅ Best Model Saved!
Epoch [2/10] - Train Loss: 1.2298
✅ Best Model Saved!
Epoch [3/10] - Train Loss: 1.0038
✅ Best Model Saved!
Epoch [4/10] - Train Loss: 0.8582
✅ Best Model Saved!
Epoch [5/10] - Train Loss: 0.8024
✅ Best Model Saved!
Epoch [6/10] - Train Loss: 0.6409
✅ Best Model Saved!
Epoch [7/10] - Train Loss: 0.6228
✅ Best Model Saved!
Epoch [8/10] - Train Loss: 0.5731
✅ Best Model Saved!
Epoch [9/10] - Train Loss: 0.4636
✅ Best Model Saved!
Epoch [10/10] - Train Loss: 0.4199
✅ Best Model Saved!
🔥 Training Complete!


In [43]:
# ✅ Load Best Model
model.load_state_dict(torch.load("x.pth"))
model.eval()

# ✅ Initialize Tracking Variables
correct_per_class = {class_name: 0 for class_name in reverse_label_mapping.values()}
total_per_class = {class_name: 0 for class_name in reverse_label_mapping.values()}
incorrect_predictions = []

test_loss = 0.0
correct = 0
total = 0

with torch.no_grad():
    for mel_specs, sample_rates, labels, file_paths in test_loader:
        mel_specs, labels = mel_specs.to(device), labels.to(device)
        outputs = model(mel_specs)
        loss = criterion(outputs, labels)
        test_loss += loss.item()

        # ✅ Get Predictions
        _, predicted = torch.max(outputs, 1)

        # ✅ Update Counters
        for i in range(len(labels)):
            true_label = labels[i].item()
            pred_label = predicted[i].item()
            true_class = reverse_label_mapping[true_label]
            pred_class = reverse_label_mapping[pred_label]

            total_per_class[true_class] += 1  # Track total samples per class

            if pred_label == true_label:
                correct_per_class[true_class] += 1
            else:
                incorrect_predictions.append((file_paths[i], true_class, pred_class))

        correct += (predicted == labels).sum().item()
        total += labels.size(0)

# ✅ Print Final Test Accuracy
test_acc = correct / total
print(f"🔥 Final Test Loss: {test_loss / len(test_loader):.4f}")
print(f"🎯 Final Test Accuracy: {test_acc:.2%}")

# ✅ Print Accuracy Per Class
print("\n✅ Class-wise Accuracy:")
for class_name in correct_per_class:
    if total_per_class[class_name] > 0:  # Avoid division by zero
        accuracy = correct_per_class[class_name] / total_per_class[class_name] * 100
        print(f"  🏷️ {class_name}: {accuracy:.2f}% ({correct_per_class[class_name]}/{total_per_class[class_name]})")


🔥 Final Test Loss: 1.0331
🎯 Final Test Accuracy: 71.00%

✅ Class-wise Accuracy:
  🏷️ American Robin: 45.00% (9/20)
  🏷️ Bewick's Wren: 85.00% (17/20)
  🏷️ Northern Cardinal: 95.00% (19/20)
  🏷️ Northern Mockingbird: 75.00% (15/20)
  🏷️ Song Sparrow: 55.00% (11/20)
