In [25]:
import torch
from torch.utils.data import Dataset
import os
from scipy.io import wavfile

class BirdClassDataset(Dataset):
    def __init__(self, root_dir, transform=None):
        self.root_dir = root_dir
        self.transform = transform
        self.classes = os.listdir(root_dir)
        self.data = []
        for i, bird_class in enumerate(self.classes):
            class_path = os.path.join(root_dir, bird_class)
            for file in os.listdir(class_path):
                self.data.append((os.path.join(class_path, file), i))  # (file_path, class_index)

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        file_path, class_idx = self.data[idx]
        sample_rate, data = wavfile.read(file_path)
        sample = {'audio': data, 'class_idx': class_idx}
        if self.transform:
            sample = self.transform(sample)
        return sample


In [26]:
import torch.nn as nn

class BirdCNN(nn.Module):
    def __init__(self, num_classes):
        super(BirdCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(128 * 7 * 7, 512)
        self.fc2 = nn.Linear(512, num_classes)

    def forward(self, x):
        x = self.pool(nn.functional.relu(self.conv1(x)))
        x = self.pool(nn.functional.relu(self.conv2(x)))
        x = self.pool(nn.functional.relu(self.conv3(x)))
        x = x.view(-1, 128 * 7 * 7)
        x = nn.functional.relu(self.fc1(x))
        x = self.fc2(x)
        return x


In [31]:
import os
import torch
import torchaudio
import pandas as pd
from torch.utils.data import Dataset, DataLoader
import torch.nn as nn

# Définissez votre configuration
ANNOTATIONS_FILE = '/kaggle/input/birdclef-2023/train_metadata.csv'
AUDIO_DIR = '/kaggle/input/birdclef-2023/train_audio'
FOLDER_FILTERS = ['abethr1', 'abhori1', 'abythr1', 'afbfly1', 'afdfly1']
SAMPLE_RATE = 16000
NUM_SAMPLES = 16000
BATCH_SIZE = 32
EPOCHS = 20
LEARNING_RATE = 0.001
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Définir le dataset
class BirdClassDataset(Dataset):
    def __init__(self, annotations_file, audio_dir, transformation, target_sample_rate, num_samples, device, folder_filters=None):
        self.annotations = pd.read_csv(annotations_file)
        self.audio_dir = audio_dir
        self.device = device
        self.transformation = transformation.to(self.device)
        self.target_sample_rate = target_sample_rate
        self.num_samples = num_samples
        
        if folder_filters is not None:
            print(f"Applying folder filters: {', '.join(folder_filters)}")
            initial_count = len(self.annotations)
            self.annotations = self.annotations[self.annotations['filename'].str.contains('|'.join(folder_filters))]
            filtered_count = len(self.annotations)
            if filtered_count == 0:
                raise ValueError(f"No files found in folders: {', '.join(folder_filters)}")
            print(f"Filtered dataset from {initial_count} to {filtered_count} samples.")

        self.annotations.reset_index(drop=True, inplace=True)
        self.label_to_index = {label: idx for idx, label in enumerate(self.annotations['primary_label'].unique())}

    def __len__(self):
        return len(self.annotations)

    def __getitem__(self, index):
        audio_sample_path = self._get_audio_sample_path(index)
        label = self._get_audio_sample_label(index)
        signal, sr = torchaudio.load(audio_sample_path)
        signal = signal.to(self.device)
        signal = self._resample_if_necessary(signal, sr)
        signal = self._mix_down_if_necessary(signal)
        signal = self._cut_if_necessary(signal)
        signal = self._right_pad_if_necessary(signal)
        signal = self.transformation(signal)
        return signal, label

    def _cut_if_necessary(self, signal):
        if signal.shape[1] > self.num_samples:
            signal = signal[:, :self.num_samples]
        return signal

    def _right_pad_if_necessary(self, signal):
        length_signal = signal.shape[1]
        if length_signal < self.num_samples:
            num_missing_samples = self.num_samples - length_signal
            last_dim_padding = (0, num_missing_samples)
            signal = torch.nn.functional.pad(signal, last_dim_padding)
        return signal

    def _resample_if_necessary(self, signal, sr):
        if sr != self.target_sample_rate:
            resampler = torchaudio.transforms.Resample(sr, self.target_sample_rate).to(self.device)
            signal = resampler(signal)
        return signal

    def _mix_down_if_necessary(self, signal):
        if signal.shape[0] > 1:
            signal = torch.mean(signal, dim=0, keepdim=True)
        return signal

    def _get_audio_sample_path(self, index):
        filename = self.annotations.iloc[index]['filename']
        path = os.path.join(self.audio_dir, filename)
        return path

    def _get_audio_sample_label(self, index):
        label = self.annotations.iloc[index]['primary_label']
        label_index = self.label_to_index[label]
        return label_index

mel_spectrogram = torchaudio.transforms.MelSpectrogram(
    sample_rate=SAMPLE_RATE,
    n_mels=64,
    n_fft=1024,
    hop_length=512
)

# Charger toutes les annotations et filtrer si nécessaire
dataset = BirdClassDataset(ANNOTATIONS_FILE, AUDIO_DIR, mel_spectrogram, SAMPLE_RATE, NUM_SAMPLES, device, folder_filters=FOLDER_FILTERS)

# Diviser le dataset en ensemble d'entraînement et de validation
train_size = int(0.8 * len(dataset))
val_size = len(dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset, [train_size, val_size])

train_loader = DataLoader(train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=BATCH_SIZE, shuffle=False)

class BirdCNN(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1)
        self.relu = nn.ReLU()
        self.maxpool = nn.MaxPool2d(kernel_size=2)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(64 * 16 * 8, 128)  # Corrected dimensions here
        self.fc2 = nn.Linear(128, num_classes)

    def forward(self, x):
        x = self.relu(self.conv1(x))
        x = self.maxpool(x)
        x = self.relu(self.conv2(x))
        x = self.maxpool(x)
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.fc2(x)
        return x
# Instancier le modèle
num_classes = len(dataset.label_to_index)
model = BirdCNN(num_classes=num_classes).to(device)
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)

# Fonction de formation
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):
        X, y = X.to(device), y.to(device)
        
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 10 == 0:
            loss, current = loss.item(), batch * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

# Fonction de test
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0

    with torch.no_grad():
        for X, y in dataloader:
            X, y = X.to(device), y.to(device)
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
    return correct

# Entraîner le modèle
for epoch in range(EPOCHS):
    print(f"Epoch {epoch+1}\n-------------------------------")
    train(train_loader, model, loss_fn, optimizer)
    accuracy = test(val_loader, model, loss_fn)
    print(f"Validation Accuracy: {accuracy:.2f}")

# Sauvegarder le modèle entraîné
torch.save(model.state_dict(), 'birdclef_cnn_model.pth')

Applying folder filters: abethr1, abhori1, abythr1, afbfly1, afdfly1
Filtered dataset from 16941 to 218 samples.
Epoch 1
-------------------------------
loss: 4.805730  [    0/  174]
Test Error: 
 Accuracy: 52.3%, Avg loss: 6.981363 

Validation Accuracy: 0.52
Epoch 2
-------------------------------
loss: 0.947601  [    0/  174]
Test Error: 
 Accuracy: 52.3%, Avg loss: 5.841698 

Validation Accuracy: 0.52
Epoch 3
-------------------------------
loss: 0.980016  [    0/  174]
Test Error: 
 Accuracy: 54.5%, Avg loss: 7.683813 

Validation Accuracy: 0.55
Epoch 4
-------------------------------
loss: 0.774240  [    0/  174]
Test Error: 
 Accuracy: 52.3%, Avg loss: 9.282438 

Validation Accuracy: 0.52
Epoch 5
-------------------------------
loss: 0.571560  [    0/  174]
Test Error: 
 Accuracy: 54.5%, Avg loss: 9.220858 

Validation Accuracy: 0.55
Epoch 6
-------------------------------
loss: 0.811113  [    0/  174]
Test Error: 
 Accuracy: 50.0%, Avg loss: 9.448606 

Validation Accuracy: 0.50

In [32]:
# Charger les poids du modèle entraîné
model = BirdCNN(num_classes=num_classes).to(device)
model.load_state_dict(torch.load('birdclef_cnn_model.pth'))

# Définir une fonction pour prédire les étiquettes sur de nouvelles données
def predict_new_data(model, data_loader):
    predictions = []
    model.eval()
    with torch.no_grad():
        for X, _ in data_loader:
            X = X.to(device)
            outputs = model(X)
            _, predicted = torch.max(outputs, 1)
            predictions.extend(predicted.tolist())
    return predictions

# Charger de nouvelles données pour la prédiction
# Assurez-vous de les prétraiter de la même manière que les données d'entraînement et de validation
# Créez un DataLoader pour les nouvelles données, comme vous l'avez fait pour les données d'entraînement et de validation

# Prédire les étiquettes sur les nouvelles données
new_data_predictions = predict_new_data(model, new_data_loader)

# Afficher les prédictions
print(new_data_predictions)


NameError: name 'new_data_loader' is not defined