### Split the data into train set and test set

In [27]:
import os
import numpy as np
import librosa
import torch
from torch.utils.data import DataLoader, random_split
from dap_datasets import DAPSAudioDataset
from spectrograms import generate_spectrograms

# Directories
audio_dir = "daps"
spectrogram_dir = "spectrograms"

# Class 1 speakers
class_1_speakers = ["f1", "f7", "f8", "m3", "m6", "m8"]

# Class mapping
class_mapping = {}

# Load audio files and create class mapping
for root, dirs, files in os.walk(audio_dir):
    for file in files:
        if file.startswith("._") or not file.endswith(".wav"):
            continue

        audio_path = os.path.join(root, file)
        try:
            audio, sr = librosa.load(audio_path, sr=None)
            if isinstance(audio, np.ndarray) and len(audio) > 0:  # Ensure valid audio data
                speaker_prefix = file.split("_")[0]
                if speaker_prefix in class_1_speakers:
                    class_mapping[audio_path] = 1
                else:
                    class_mapping[audio_path] = 0
            else:
                print(f"Warning: Audio data is invalid or empty for {audio_path}")
        except Exception as e:
            print(f"Error processing {audio_path}: {e}")

print("Class mapping created:")
print(class_mapping)

# Print class distribution
class_0_count = sum(1 for label in class_mapping.values() if label == 0)
class_1_count = sum(1 for label in class_mapping.values() if label == 1)
print(f"Total Class 0 samples: {class_0_count}")
print(f"Total Class 1 samples: {class_1_count}")

# Audio preprocessing function
def preprocess_audio(audio_path, max_length=16000):
    try:
        audio, sr = librosa.load(audio_path, sr=None)
        if audio is None or len(audio) == 0:
            print(f"Warning: Audio is None or empty for {audio_path}")
            return None

        mel_spectrogram = librosa.feature.melspectrogram(y=audio, sr=sr, n_mels=128, fmax=8000)
        mel_spectrogram_db = librosa.power_to_db(mel_spectrogram, ref=np.max)
        normalized_spectrogram = (mel_spectrogram_db - np.mean(mel_spectrogram_db)) / np.std(mel_spectrogram_db)

        # Resize the spectrogram
        target_length = max_length
        if normalized_spectrogram.shape[1] > target_length:
            normalized_spectrogram = normalized_spectrogram[:, :target_length]
        else:
            padding = target_length - normalized_spectrogram.shape[1]
            normalized_spectrogram = np.pad(normalized_spectrogram, ((0, 0), (0, padding)), mode="constant")

        spectrogram_tensor = torch.tensor(normalized_spectrogram, dtype=torch.float32).unsqueeze(0)
        return spectrogram_tensor
    except Exception as e:
        print(f"Error processing {audio_path}: {e}")
        return None

# Dataset class
class DAPSAudioDataset:
    def __init__(self, class_mapping, transform=None):
        self.class_mapping = list(class_mapping.items())
        self.transform = transform

    def __len__(self):
        return len(self.class_mapping)

    def __getitem__(self, idx):
        audio_path, label = self.class_mapping[idx]
        processed_audio = self.transform(audio_path) if self.transform else None

        if processed_audio is None:
            return None  # Return None if preprocessing failed

        return processed_audio, label

# Prepare the dataset
print("Preparing the dataset...")
full_dataset = DAPSAudioDataset(class_mapping=class_mapping, transform=preprocess_audio)
print("Dataset created")

# Train-test split
train_ratio = 0.8
train_size = int(train_ratio * len(full_dataset))
test_size = len(full_dataset) - train_size

# Split the dataset into training and testing sets
train_dataset, test_dataset = random_split(full_dataset, [train_size, test_size])

def collate_fn(batch):
    batch = [item for item in batch if item is not None]
    if len(batch) == 0:
        return None
    return torch.utils.data.dataloader.default_collate(batch)

train_loader = DataLoader(
    train_dataset, batch_size=16, shuffle=True, collate_fn=collate_fn
)
test_loader = DataLoader(
    test_dataset, batch_size=16, shuffle=False, collate_fn=collate_fn
)

# Print the shape of the first batch in the training set
for batch in train_loader:
    if batch is not None:
        inputs, labels = batch
        print(f"Train Inputs shape: {inputs.shape}")
        print(f"Train Labels: {labels}")
    break

# Print the shape of the first batch in the testing set
for batch in test_loader:
    if batch is not None:
        inputs, labels = batch
        print(f"Test Inputs shape: {inputs.shape}")
        print(f"Test Labels: {labels}")
    break

Class mapping created:
{'daps\\daps\\clean\\f10_script1_clean.wav': 0, 'daps\\daps\\clean\\f10_script2_clean.wav': 0, 'daps\\daps\\clean\\f10_script3_clean.wav': 0, 'daps\\daps\\clean\\f10_script4_clean.wav': 0, 'daps\\daps\\clean\\f10_script5_clean.wav': 0, 'daps\\daps\\clean\\f1_script1_clean.wav': 1, 'daps\\daps\\clean\\f1_script2_clean.wav': 1, 'daps\\daps\\clean\\f1_script3_clean.wav': 1, 'daps\\daps\\clean\\f1_script4_clean.wav': 1, 'daps\\daps\\clean\\f1_script5_clean.wav': 1, 'daps\\daps\\clean\\f2_script1_clean.wav': 0, 'daps\\daps\\clean\\f2_script2_clean.wav': 0, 'daps\\daps\\clean\\f2_script3_clean.wav': 0, 'daps\\daps\\clean\\f2_script4_clean.wav': 0, 'daps\\daps\\clean\\f2_script5_clean.wav': 0, 'daps\\daps\\clean\\f3_script1_clean.wav': 0, 'daps\\daps\\clean\\f3_script2_clean.wav': 0, 'daps\\daps\\clean\\f3_script3_clean.wav': 0, 'daps\\daps\\clean\\f3_script4_clean.wav': 0, 'daps\\daps\\clean\\f3_script5_clean.wav': 0, 'daps\\daps\\clean\\f4_script1_clean.wav': 0, 'daps

In [22]:
len(train_loader)

76

In [23]:
import sys
size = sys.getsizeof(train_loader)

print("size", size)

size 48


In [28]:
len(train_loader)

import psutil

def memory_usage_in_gb():
    process= psutil.Process(os.getpid())
    memory_usage_in_gb = process.memory_info().rss / ( 1024**3)
    return memory_usage_in_gb

total_batches = 0
for data, target in train_loader :
    total_batches += 1 
    print(f"Memory usage after batch { total_batches} : {memory_usage_in_gb():.4f} GB")

print(f"Memory usage after batch after loading data : { memory_usage_in_gb():4f} GB")

one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
Memory usage after batch 1 : 0.8524 GB
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
Memory usage after batch 2 : 0.8524 GB
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectro is computed
one spectr

KeyboardInterrupt: 

In [24]:
import sys
a= 1234

print(sys.getsizeof(a))


28


In [4]:
len(test_loader)

19

### The model


In [15]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)  # Change input channels to 1 for grayscale images (spectrograms)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        
        # Calculate the input size to the first fully connected layer
        self.fc1_input_size = self._get_fc1_input_size()
        self.fc1 = nn.Linear(self.fc1_input_size, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)  # Adjust output size as needed

    def _get_fc1_input_size(self):
        # Create a dummy input tensor to calculate the output size after conv layers
        with torch.no_grad():
            x = torch.zeros(1, 1, 128, 16000)  # Assuming input spectrogram shape (1, 128, 16000)
            x = self.pool(F.relu(self.conv1(x)))
            x = self.pool(F.relu(self.conv2(x)))
            return x.numel()  # Total number of elements

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)  # flatten all dimensions except batch
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x



### Train the model

In [16]:
### reduce the train set to few samples for faster training

from torch.utils.data import DataLoader, Subset


subset_dataset = Subset(train_dataset, list(range(20)))

train_loader = DataLoader(subset_dataset, batch_size=1, shuffle=False)

In [9]:
len(train_loader)

20

In [10]:
# AVOID YOUR COMPUTER TO CRASH
import torch

torch.set_num_threads(4) 

In [11]:
import multiprocessing


print("Nombre de cœurs physiques:", multiprocessing.cpu_count())


logical_cores = torch.get_num_threads()
print("Nombre de cœurs logiques (threads disponibles):", logical_cores)


Nombre de cœurs physiques: 8
Nombre de cœurs logiques (threads disponibles): 4


In [17]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader

# Set device (use GPU if available)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Initialize the model, loss function, and optimizer
model = Net().to(device)  # Move the model to the specified device
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Using Adam optimizer

# Training loop
num_epochs = 10

for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}, start training...")
    model.train()  # Set the model to training mode
    running_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)  # Move inputs and labels to the device

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(inputs)

        # Calculate loss
        loss = criterion(outputs, labels)

        # Backward pass and optimization
        loss.backward()
        optimizer.step()

        running_loss += loss.item()  # Accumulate loss

    # Average loss for the epoch
    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}")

# Save the model if needed
torch.save(model.state_dict(), "simple_cnn_model.pth")
print("Model saved successfully.")


Epoch 1/10, start training...
Epoch [1/10], Loss: 50.4622
Epoch 2/10, start training...
Epoch [2/10], Loss: 0.8855
Epoch 3/10, start training...
Epoch [3/10], Loss: 0.5245
Epoch 4/10, start training...
Epoch [4/10], Loss: 0.0963
Epoch 5/10, start training...
Epoch [5/10], Loss: 0.0247
Epoch 6/10, start training...
Epoch [6/10], Loss: 0.0011
Epoch 7/10, start training...
Epoch [7/10], Loss: 0.0006
Epoch 8/10, start training...
Epoch [8/10], Loss: 0.0005
Epoch 9/10, start training...
Epoch [9/10], Loss: 0.0004
Epoch 10/10, start training...
Epoch [10/10], Loss: 0.0003
Model saved successfully.


### Evaluate Model

In [18]:
import torch
from sklearn.metrics import f1_score

model = Net()

model.load_state_dict(torch.load("simple_cnn_model.pth"))
print("the model is loaded")
model.eval()

all_labels = []
all_predictions = []

occurence = 0
with torch.no_grad():
    length = len(test_loader)
    for inputs, labels in test_loader:
        print(f"start testing occurence {occurence}/{length}")
        occurence += 1
        inputs, labels = inputs.to(device), labels.to(device)

        
        outputs = model(inputs)
        _, predicted = torch.max(
            outputs, 1
        )  

        
        all_labels.extend(labels.cpu().numpy())  
        all_predictions.extend(predicted.cpu().numpy())


f1 = f1_score(
    all_labels, all_predictions, average="binary"
) 
print(f"F1-Score: {f1:.4f}")

  model.load_state_dict(torch.load("simple_cnn_model.pth"))


the model is loaded
start testing occurence 0/19
start testing occurence 1/19
start testing occurence 2/19
start testing occurence 3/19
start testing occurence 4/19
start testing occurence 5/19
start testing occurence 6/19
start testing occurence 7/19
start testing occurence 8/19
start testing occurence 9/19
start testing occurence 10/19
start testing occurence 11/19
start testing occurence 12/19
start testing occurence 13/19
start testing occurence 14/19
start testing occurence 15/19
start testing occurence 16/19
start testing occurence 17/19
start testing occurence 18/19
F1-Score: 0.2569
