In [None]:
import pandas as pd
import numpy as np
import librosa

from sklearn.metrics import confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score
from sklearn.preprocessing import label_binarize

import torch
import torch.nn as nn
from torch.optim import Adam
from torch.nn import CrossEntropyLoss
from torch.utils.data import DataLoader, Dataset, random_split

from skimage.transform import resize

import seaborn as sns
import matplotlib.pyplot as plt

import timm

In [None]:
df = pd.read_csv("/kaggle/input/bird-song-data-set/bird_songs_metadata.csv")

In [None]:
species_counts = df['species'].value_counts()
print(species_counts)

In [None]:
df.isna().sum()

In [None]:
# bngeb al len bta3 al audio
def len_audio(path,sample_rate=16000):
    file_path = "/kaggle/input/bird-song-data-set/wavfiles/" + path
    audio ,sr= librosa.load(file_path,sr=None)
    audio_len = len(audio) / sr
    return audio_len

In [None]:
df['audio_length'] = df['filename'].apply(len_audio)

In [None]:
# bshof al length bta3 al audio mo5tlf wala eh
df['audio_length'].hist()

In [None]:
# b load al audio :)
def load_audio(path,target_rate = 16000):
    file_path = file_path = "/kaggle/input/bird-song-data-set/wavfiles/" + path
    
    audio,sr = librosa.load(file_path,sr=None)

    audio = librosa.resample(audio, orig_sr=sr, target_sr=target_rate)

    audio = np.array(audio)
    return audio

In [None]:
df['audio'] = df['filename'].apply(load_audio)

In [None]:
def process_audio_to_mel_log_spectrogram(example, sr=16000, n_mels=128, target_size=(224, 224)):
    audio = example
    
    # Generate Mel spectrogram
    mel_spec = librosa.feature.melspectrogram(y=np.array(audio), sr=sr, n_mels=n_mels)
    
    # Convert to log scale (Mel-log spectrogram)
    log_mel_spec = librosa.power_to_db(mel_spec, ref=np.max)
    
    # Normalize the spectrogram to [0, 1]
    log_mel_spec -= log_mel_spec.min()
    log_mel_spec /= log_mel_spec.max()
    
    # Resize spectrogram to target size
    resized_spec = resize(log_mel_spec, target_size, anti_aliasing=True, mode='reflect')

    resized_spec = np.stack([resized_spec] * 3, axis=0)
    
    return resized_spec

In [None]:
df['mel_log'] = df['audio'].apply(process_audio_to_mel_log_spectrogram)

In [None]:
song_features = df['mel_log'].values
labels = pd.get_dummies(df['species']).astype(np.float32).values

In [None]:
class birdset(Dataset):
    def __init__(self,features,labels):
        self.features = features
        self.labels = labels

    def __len__(self):
        return len(self.labels)

    def __getitem__(self,idx):
        feature = self.features[idx]
        label = self.labels[idx]
        return torch.tensor(feature),torch.tensor(label)

In [None]:
dataset = birdset(song_features,labels)

In [None]:
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = random_split(dataset, [train_size, val_size])

In [None]:
learning_rate = 1e-4
batch_size = 64
epochs = 3

In [None]:
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)

In [None]:
def train_model(model, train_loader, criterion, optimizer, device, epochs=3):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        correct = 0
        total = 0

        for inputs, labels in train_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            labels = torch.argmax(labels, dim=1)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

        train_accuracy = correct / total
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss:.4f}, Accuracy: {train_accuracy:.4f}")

In [None]:
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc
import numpy as np
from sklearn.preprocessing import label_binarize

def evaluate_model(model, val_loader, device, num_classes):
    model.eval()
    val_correct = 0
    val_total = 0
    val_all_labels = []
    val_all_predictions = []
    val_all_probs = []

    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            labels = torch.argmax(labels, dim=1)

            outputs = model(inputs)
            _, predicted = torch.max(outputs.data, 1)
            val_total += labels.size(0)
            val_correct += (predicted == labels).sum().item()

            val_all_labels.extend(labels.cpu().numpy())
            val_all_predictions.extend(predicted.cpu().numpy())
            val_all_probs.extend(torch.softmax(outputs, dim=1).cpu().numpy())

    val_accuracy = val_correct / val_total
    print(f"Validation Accuracy: {val_accuracy:.4f}")

    # Confusion Matrix
    val_cm = confusion_matrix(val_all_labels, val_all_predictions)
    print(f"Validation Confusion Matrix:\n{val_cm}")

    # Precision, Recall, F1 Score
    val_precision = precision_score(val_all_labels, val_all_predictions, average='weighted')
    val_recall = recall_score(val_all_labels, val_all_predictions, average='weighted')
    val_f1 = f1_score(val_all_labels, val_all_predictions, average='weighted')

    print(f"Validation Precision: {val_precision:.4f}")
    print(f"Validation Recall: {val_recall:.4f}")
    print(f"Validation F1 Score: {val_f1:.4f}")

    # ROC AUC
    val_all_labels_bin = label_binarize(val_all_labels, classes=range(num_classes))  # Adjust num_classes
    val_roc_auc = roc_auc_score(val_all_labels_bin, val_all_probs, multi_class='ovr', average='weighted')
    print(f"Validation ROC AUC: {val_roc_auc:.4f}")

    # ROC Curve Plot
    fpr = dict()
    tpr = dict()
    roc_auc = dict()
    
    for i in range(num_classes):
        fpr[i], tpr[i], _ = roc_curve(val_all_labels_bin[:, i], np.array(val_all_probs)[:, i])
        roc_auc[i] = auc(fpr[i], tpr[i])

    # Plot the ROC curve for each class
    plt.figure()
    colors = ['blue', 'red', 'green', 'orange', 'purple']  # Add more colors if needed
    
    for i in range(num_classes):
        plt.plot(fpr[i], tpr[i], color=colors[i % len(colors)], lw=2, label=f'Class {i} (AUC = {roc_auc[i]:.2f})')

    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('Receiver Operating Characteristic (ROC) Curve')
    plt.legend(loc="lower right")
    plt.show()

    return val_cm, val_accuracy, val_precision, val_recall, val_f1, val_roc_auc

In [None]:
import torch.nn as nn


class ResidualBlock(nn.Module):
    def __init__(self, in_channels, out_channels, stride=1, downsample=None):
        super(ResidualBlock, self).__init__()
        self.conv1 = nn.Sequential(
            nn.Conv2d(
                in_channels,
                out_channels,
                kernel_size=3,
                stride=stride,
                padding=1,
            ),
            nn.BatchNorm2d(out_channels),
            nn.ReLU(),
        )
        self.conv2 = nn.Sequential(
            nn.Conv2d(out_channels, out_channels, kernel_size=3, stride=1, padding=1),
            nn.BatchNorm2d(out_channels),
        )
        self.downsample = downsample
        self.relu = nn.ReLU()
        self.out_channels = out_channels

    def forward(self, x):
        residual = x

        out = self.conv1(x)
        out = self.conv2(out)

        if self.downsample:
            residual = self.downsample(x)

        out += residual
        out = self.relu(out)
        return out


class ResNet(nn.Module):
    def __init__(
        self,
        layers,
        out_neurons=1,
        block=ResidualBlock,
    ):
        super(ResNet, self).__init__()
        self.inplanes = 64
        self.conv1 = nn.Sequential(
            nn.Conv2d(3, 64, kernel_size=7, stride=2, padding=3),
            nn.BatchNorm2d(64),
            nn.ReLU(),
        )
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.layer0 = self._make_layer(block, 64, layers[0], stride=1)
        self.layer1 = self._make_layer(block, 128, layers[1], stride=2)
        self.layer2 = self._make_layer(block, 256, layers[2], stride=2)
        self.layer3 = self._make_layer(block, 512, layers[3], stride=2)
        self.avgpool = nn.AvgPool2d(7, stride=1)
        self.fc = nn.Linear(512, out_neurons)

    def _make_layer(self, block, planes, blocks, stride=1):
        downsample = None
        if stride != 1 or self.inplanes != planes:
            downsample = nn.Sequential(
                nn.Conv2d(self.inplanes, planes, kernel_size=1, stride=stride),
                nn.BatchNorm2d(planes),
            )

        layers = []
        layers.append(block(self.inplanes, planes, stride, downsample))

        self.inplanes = planes
        for _ in range(1, blocks):
            layers.append(block(self.inplanes, planes))

        return nn.Sequential(*layers)

    def forward(self, x):
        x = self.conv1(x)
        x = self.maxpool(x)

        x = self.layer0(x)
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)

        x = self.avgpool(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)

        return x

In [None]:
class BottleneckBlock(nn.Module):
    expansion = 4
    
    def __init__(
        self,
        in_channels: int,
        out_channels: int=3,
        identity_downsample=None,
        stride: int = 1,
    ):
        super(BottleneckBlock, self).__init__()
        self.identity_downsample = identity_downsample

        self.conv1 = nn.Conv2d(
            in_channels,
            out_channels,
            kernel_size=1,
            stride=1,
            padding=0,
        )
        self.bn1 = nn.BatchNorm2d(out_channels)

        self.conv2 = nn.Conv2d(
            out_channels,
            out_channels,
            kernel_size=3,
            stride=stride,
            padding=1,
        )
        self.bn2 = nn.BatchNorm2d(out_channels)

        self.conv3 = nn.Conv2d(
            out_channels,
            out_channels * self.expansion,
            kernel_size=1,
            stride=1,
            padding=0,
        )
        self.bn3 = nn.BatchNorm2d(out_channels * self.expansion)

        self.relu = nn.ReLU()

    def forward(self, x):
        identity = x

        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)

        x = self.conv2(x)
        x = self.bn2(x)
        x = self.relu(x)

        x = self.conv3(x)
        x = self.bn3(x)

        if self.identity_downsample is not None:
            identity = self.identity_downsample(identity)

        x += identity
        return self.relu(x)


class DeepResNet(nn.Module):
    def __init__(
        self, layers: list, out_neurons: int, image_channels: int=3, block=BottleneckBlock
    ):
        super(DeepResNet, self).__init__()
        self.in_channels = 64
        self.conv1 = nn.Conv2d(image_channels, 64, kernel_size=7, stride=2, padding=3)
        self.bn1 = nn.BatchNorm2d(64)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)

        self.layer1 = self._make_layer(block, layers[0], out_channels=64, stride=1)
        self.layer2 = self._make_layer(block, layers[1], out_channels=128, stride=2)
        self.layer3 = self._make_layer(block, layers[2], out_channels=256, stride=2)
        self.layer4 = self._make_layer(block, layers[3], out_channels=512, stride=2)

        self.avgpool = nn.AdaptiveAvgPool2d(output_size=(1, 1))
        self.flatten = nn.Flatten()
        self.ffn = nn.Linear(512 * 4, out_neurons)

    def forward(self, x):
        x = self.conv1(x)
        x = self.bn1(x)
        x = self.relu(x)
        x = self.maxpool(x)

        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)

        x = self.avgpool(x)
        x = self.flatten(x)
        return self.ffn(x)

    def _make_layer(
        self,
        block,
        num_residual_blocks: int,
        out_channels: int,
        stride: int = 1,
    ):
        identity_downsample = None
        layers = []

        if stride != 1 or self.in_channels != out_channels * block.expansion:
            identity_downsample = nn.Sequential(
                nn.Conv2d(
                    self.in_channels,
                    out_channels * block.expansion,
                    kernel_size=1,
                    stride=stride,
                ),
                nn.BatchNorm2d(out_channels * block.expansion),
            )

        layers.append(
            block(
                self.in_channels,
                out_channels,
                identity_downsample=identity_downsample,
                stride=stride,
            )
        )

        self.in_channels = out_channels * block.expansion

        for _ in range(num_residual_blocks - 1):
            layers.append(block(self.in_channels, out_channels))

        return nn.Sequential(*layers)

In [None]:
def load_model(model_name, num_classes, pretrained=True):
    if model_name == 'densenet121':
        model = torch.hub.load('pytorch/vision:v0.10.0', 'densenet121', pretrained=pretrained)
        model.classifier = nn.Linear(in_features=1024, out_features=num_classes, bias=True)
    elif model_name == 'xception':
        model = timm.create_model('xception', pretrained=pretrained)
        model.fc = nn.Linear(in_features=model.fc.in_features, out_features=num_classes)
    elif model_name == 'resnet-18':
        model = ResNet(layers=[2, 2, 2, 2], out_neurons=num_classes)
    elif model_name == "resnet-50":
        model = DeepResNet(layers=[3, 4, 6, 3], out_neurons=num_classes, block=BottleneckBlock)
    return model

In [None]:
def compare_models(train_loader, val_loader, device, num_classes, res_epochs=10, tuning_epochs=3):
    models = [
        ('densenet121', tuning_epochs),
        ('xception', tuning_epochs),
        ('resnet-18', res_epochs),
        ('resnet-50', res_epochs),
    ]
    results = {}

    for model_name, epochs in models:
        print(f"\nTraining and Evaluating {model_name} for {epochs} epochs...")
        # Load and initialize the model
        model = load_model(model_name, num_classes)
        model.to(device)

        # Define the criterion and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

        # Train the model
        train_model(model, train_loader, criterion, optimizer, device, epochs)

        # Evaluate the model
        cm, accuracy, precision, recall, f1, roc_auc = evaluate_model(model, val_loader, device, num_classes)

        results[model_name] = {
            'Confusion Matrix': cm,
            'Accuracy': accuracy,
            'Precision': precision,
            'Recall': recall,
            'F1 Score': f1,
            'ROC AUC': roc_auc
        }

    return results

In [None]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
results = compare_models(train_loader, val_loader, device, num_classes=5, res_epochs=10, tuning_epochs=3)

# Print results
for model_name, metrics in results.items():
    print(f"\nResults for {model_name}:")
    for metric, value in metrics.items():
        if metric == 'Confusion Matrix':
            continue
        # If value is a NumPy array, handle it based on its size
        if isinstance(value, np.ndarray):
            if value.size == 1:
                value = value.item()  # Converts to a Python scalar if it's a single-element array
            else:
                value = np.mean(value)  # Example: take the mean of the array
        print(f"{metric}: {value:.4f}")

In [None]:
for model_name, metrics in results.items():
    cm = metrics['Confusion Matrix']
    accuracy = metrics['Accuracy']
    precision = metrics['Precision']
    recall = metrics['Recall']
    f1_score = metrics['F1 Score']
    roc_auc = metrics['ROC AUC']

    # Plot Confusion Matrix
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title(f'Confusion Matrix - {model_name}')
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.show()