### Split the data into train set and test set

In [12]:
import os
from torch.utils.data import DataLoader, random_split
from dap_datasets import DAPSAudioDataset
from spectrograms import generate_spectrograms
import librosa
import numpy as np 
import torch

audio_dir = "daps"
spectrogram_dir = "spectrograms"
target_folders = ["ipad_balcony1"]

class_1_speakers = ["f1", "f7", "f8", "m3", "m6", "m8"]

class_mapping = {}

for root, dirs, files in os.walk(audio_dir):
    for file in files:
        # Skip files that start with "._" or are not ".wav" files
        if file.startswith("._") or not file.endswith(".wav"):
            continue

        speaker_prefix = file.split("_")[0]
        if speaker_prefix in class_1_speakers:
            class_mapping[os.path.join(root, file)] = 1
        else:
            class_mapping[os.path.join(root, file)] = 0

print("Class mapping created:")
print(class_mapping)

# Print class distribution
class_0_count = sum(1 for label in class_mapping.values() if label == 0)
class_1_count = sum(1 for label in class_mapping.values() if label == 1)

print(f"Total Class 0 samples: {class_0_count}")
print(f"Total Class 1 samples: {class_1_count}")


import torch
import librosa
import numpy as np


def preprocess_audio(audio_path, max_length=16000):
    try:
        audio, sr = librosa.load(audio_path, sr=None)
        if audio is None or len(audio) == 0:
            return None

        mel_spectrogram = librosa.feature.melspectrogram(
            y=audio, sr=sr, n_mels=128, fmax=8000
        )
        mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
        normalized_spectrogram = (
            mel_spectrogram_db - np.mean(mel_spectrogram_db)
        ) / np.std(mel_spectrogram_db)

        # Ajustement de la taille du spectrogramme
        target_length = max_length
        if normalized_spectrogram.shape[1] > target_length:
            normalized_spectrogram = normalized_spectrogram[:, :target_length]
        else:
            padding = target_length - normalized_spectrogram.shape[1]
            normalized_spectrogram = np.pad(
                normalized_spectrogram, ((0, 0), (0, padding)), mode="constant"
            )

        spectrogram_tensor = torch.tensor(
            normalized_spectrogram, dtype=torch.float32
        ).unsqueeze(0)
        return spectrogram_tensor
    except Exception as e:
        print(f"Error processing {audio_path}: {e}")
        return None


class DAPSAudioDataset:
    def __init__(self, class_mapping, transform=None):
        self.class_mapping = list(class_mapping.items())
        self.transform = transform

    def __len__(self):
        return len(self.class_mapping)

    def __getitem__(self, idx):
        audio_path, label = self.class_mapping[idx]
        processed_audio = self.transform(audio_path) if self.transform else None

        if processed_audio is None:
            return None  # Retourne None si le prétraitement a échoué

        return processed_audio, label


print("preparation of the dataset..")
full_dataset = DAPSAudioDataset(class_mapping=class_mapping, transform=preprocess_audio)
print("Dataset created")


# Define the train-test split ratio
train_ratio = 0.8
train_size = int(train_ratio * len(full_dataset))
test_size = len(full_dataset) - train_size


# Split the dataset into training and testing sets
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])
def collate_fn(batch):
    batch = [item for item in batch if item is not None]
    if len(batch) == 0:
        return None
    return torch.utils.data.dataloader.default_collate(batch)


train_loader = DataLoader(
    train_dataset, batch_size=16, shuffle=True, collate_fn=collate_fn
)
test_loader = DataLoader(
    test_dataset, batch_size=16, shuffle=False, collate_fn=collate_fn
)


# Print the shape of the first batch in the training set
for batch in train_loader:
    inputs, labels = batch
    print(f"Train Inputs shape: {inputs.shape}")
    print(f"Train Labels: {labels}")
    break

# Print the shape of the first batch in the testing set
for batch in test_loader:
    inputs, labels = batch
    print(f"Test Inputs shape: {inputs.shape}")
    print(f"Test Labels: {labels}")
    break

Class mapping created:
{'daps\\daps\\clean\\f10_script1_clean.wav': 0, 'daps\\daps\\clean\\f10_script2_clean.wav': 0, 'daps\\daps\\clean\\f10_script3_clean.wav': 0, 'daps\\daps\\clean\\f10_script4_clean.wav': 0, 'daps\\daps\\clean\\f10_script5_clean.wav': 0, 'daps\\daps\\clean\\f1_script1_clean.wav': 1, 'daps\\daps\\clean\\f1_script2_clean.wav': 1, 'daps\\daps\\clean\\f1_script3_clean.wav': 1, 'daps\\daps\\clean\\f1_script4_clean.wav': 1, 'daps\\daps\\clean\\f1_script5_clean.wav': 1, 'daps\\daps\\clean\\f2_script1_clean.wav': 0, 'daps\\daps\\clean\\f2_script2_clean.wav': 0, 'daps\\daps\\clean\\f2_script3_clean.wav': 0, 'daps\\daps\\clean\\f2_script4_clean.wav': 0, 'daps\\daps\\clean\\f2_script5_clean.wav': 0, 'daps\\daps\\clean\\f3_script1_clean.wav': 0, 'daps\\daps\\clean\\f3_script2_clean.wav': 0, 'daps\\daps\\clean\\f3_script3_clean.wav': 0, 'daps\\daps\\clean\\f3_script4_clean.wav': 0, 'daps\\daps\\clean\\f3_script5_clean.wav': 0, 'daps\\daps\\clean\\f4_script1_clean.wav': 0, 'daps

In [13]:
len(train_loader)

76

In [14]:
len(test_loader)

19

### The model


In [6]:
import torch
import torch.nn as nn
import torch.optim as optim


class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2, padding=0)
        # Ajustement de la taille de fc1
        self.fc1 = nn.Linear(32 * 32 * 4000, 128)
        self.fc2 = nn.Linear(128, 2)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = x.view(x.size(0), -1)
        x = self.relu(self.fc1(x))
        x = self.dropout(x)
        x = self.fc2(x)
        return x

### Train the model

In [15]:
### reduce the train set to few samples for faster training

from torch.utils.data import DataLoader, Subset


subset_dataset = Subset(train_dataset, list(range(20)))

train_loader = DataLoader(subset_dataset, batch_size=1, shuffle=False)

In [16]:
len(train_loader)

20

In [17]:
# AVOID YOUR COMPUTER TO CRASH
import torch

torch.set_num_threads(4) 

In [18]:
import multiprocessing


print("Nombre de cœurs physiques:", multiprocessing.cpu_count())


logical_cores = torch.get_num_threads()
print("Nombre de cœurs logiques (threads disponibles):", logical_cores)


Nombre de cœurs physiques: 8
Nombre de cœurs logiques (threads disponibles): 4


In [19]:
# Set device (use GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the model, loss function, and optimizer
model = SimpleCNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Training loop
num_epochs = 10

for epoch in range(num_epochs):
    print(f"Epoch {epoch+1}/{num_epochs}, start training...")
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"Epoch [{epoch+1}/{num_epochs}], Loss: {running_loss/len(train_loader):.4f}")

# Save the model if needed
torch.save(model.state_dict(), "simple_cnn_model.pth")

Epoch 1/10, start training...
Epoch [1/10], Loss: 151.3322
Epoch 2/10, start training...
Epoch [2/10], Loss: 30.6230
Epoch 3/10, start training...
Epoch [3/10], Loss: 0.2650
Epoch 4/10, start training...
Epoch [4/10], Loss: 0.0358
Epoch 5/10, start training...
Epoch [5/10], Loss: 0.0027
Epoch 6/10, start training...
Epoch [6/10], Loss: 0.0041
Epoch 7/10, start training...
Epoch [7/10], Loss: 0.0001
Epoch 8/10, start training...
Epoch [8/10], Loss: 0.0003
Epoch 9/10, start training...
Epoch [9/10], Loss: 0.0001
Epoch 10/10, start training...
Epoch [10/10], Loss: 0.0000


### Evaluate Model

In [20]:
import torch
from sklearn.metrics import f1_score

model = SimpleCNN()

model.load_state_dict(torch.load("simple_cnn_model.pth"))
print("the model is loaded")
model.eval()

all_labels = []
all_predictions = []

occurence = 0
with torch.no_grad():
    length = len(test_loader)
    for inputs, labels in test_loader:
        print(f"start testing occurence {occurence}/{length}")
        occurence += 1
        inputs, labels = inputs.to(device), labels.to(device)

        
        outputs = model(inputs)
        _, predicted = torch.max(
            outputs, 1
        )  

        
        all_labels.extend(labels.cpu().numpy())  
        all_predictions.extend(predicted.cpu().numpy())


f1 = f1_score(
    all_labels, all_predictions, average="binary"
) 
print(f"F1-Score: {f1:.4f}")

  model.load_state_dict(torch.load("simple_cnn_model.pth"))


the model is loaded
start testing occurence 0/19
start testing occurence 1/19
start testing occurence 2/19
start testing occurence 3/19
start testing occurence 4/19
start testing occurence 5/19
start testing occurence 6/19
start testing occurence 7/19
start testing occurence 8/19
start testing occurence 9/19
start testing occurence 10/19
start testing occurence 11/19
start testing occurence 12/19
start testing occurence 13/19
start testing occurence 14/19
start testing occurence 15/19
start testing occurence 16/19
start testing occurence 17/19
start testing occurence 18/19
F1-Score: 0.1647
