### Split the data into train set and test set

In [26]:
from src import DAPSAudioDataset_with_cropping
import os
import librosa
from torch.utils.data import DataLoader, Subset

import torch

audio_dir = "daps"

class_1_speakers = ["f1", "f7", "f8", "m3", "m6", "m8"]

class_mapping = {}

for root, dirs, files in os.walk('precomputed_spectrograms_aug'):
    for file in files:
        audio_path = os.path.join(root, file)
        try:
            speaker_prefix = file.split("_")[0]
            if speaker_prefix in class_1_speakers:
                class_mapping[audio_path] = 1
            else:
                class_mapping[audio_path] = 0
        except Exception as e:
            print(f"Error processing {audio_path}: {e}")

print("Class mapping created:")
# print(class_mapping)

class_0_count = sum(1 for label in class_mapping.values() if label == 0)
class_1_count = sum(1 for label in class_mapping.values() if label == 1)
print(f"Total Class 0 samples: {class_0_count}")
print(f"Total Class 1 samples: {class_1_count}")
print("Preparing the dataset...")

full_dataset = DAPSAudioDataset_with_cropping(class_mapping=class_mapping)

test_ids =[]
train_ids =[]
filenames = set()
for idx in range(len(full_dataset)):
    path, label = full_dataset.class_mapping[idx]
    if path is not None:
        filename = os.path.basename(path)
            
        filename_without_ext = os.path.splitext(filename)[0]
        parts = filename_without_ext.split('_')
            
        group = parts[0]
        place = ""
        if len(parts) ==  3:
            place = f"{group}_{parts[2]}"
        elif len(parts) == 4: 
            if parts[3].startswith('aug'):
                place =  f"{group}_{parts[2]}_{parts[3]}"
            else:
                place = f"{group}_{parts[2]}_{parts[3]}"
        elif len(parts) == 5:
                place = f"{group}_{parts[2]}_{parts[3]}_{parts[4]}"
        
        if place not in filenames:
            filenames.add(place)
            test_ids.append(idx)
        else:
            train_ids.append(idx) 
print(len(test_ids))
print(len(train_ids))

print("Dataset created")

train_dataset = Subset(full_dataset, train_ids)
test_dataset = Subset(full_dataset, test_ids)

def collate_fn(batch):
    batch = [item for item in batch if item is not None]
    if len(batch) == 0:
        return None
    return torch.utils.data.dataloader.default_collate(batch)


train_loader = DataLoader(
    train_dataset, batch_size=64, shuffle=True, collate_fn=collate_fn
)

test_loader = DataLoader(
    test_dataset, batch_size=16, shuffle=True, collate_fn=collate_fn
)

for batch in train_loader:
    inputs, labels = batch
    print(f"Train Inputs shape: {inputs.shape}")
    print(f"Train Labels: {labels}")
    break

for batch in test_loader:
    inputs, labels = batch
    print(f"Test Inputs shape: {inputs.shape}")
    print(f"Test Labels: {labels}")
    break

Class mapping created:
Total Class 0 samples: 1050
Total Class 1 samples: 1350
Preparing the dataset...
480
1920
Dataset created


  spectrogram_tensor = torch.load(spectrogram_path)


Train Inputs shape: torch.Size([64, 1, 64, 64])
Train Labels: tensor([1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1,
        1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1, 1,
        1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0])
Test Inputs shape: torch.Size([16, 1, 64, 64])
Test Labels: tensor([0, 0, 1, 1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1])


In [27]:
print(len(train_loader))

30


In [3]:
len(test_loader)

30

### The model


In [21]:
from src import Net 

In [22]:
import torch



freq_bins = 64
time_steps = 64

model = Net(freq_bins, time_steps)
print(model)


# Test forward pass

test_input = torch.randn(1, 1, 64, 64)  # Simulated random input
output = model(test_input)

print(output.shape)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=2704, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)
torch.Size([1, 10])


### Train the model

In [23]:
### reduce the train set to few samples for faster training
### only for tests/ reduces the efficiency of the model
from torch.utils.data import DataLoader, Subset


subset_dataset = Subset(train_dataset, list(range(20)))

train_loader = DataLoader(subset_dataset, batch_size=1, shuffle=False)

In [7]:
len(train_loader)

120

In [16]:
# AVOID YOUR COMPUTER TO CRASH
import torch

torch.set_num_threads(4) 

In [17]:
import multiprocessing


print("Nombre de cœurs physiques:", multiprocessing.cpu_count())


logical_cores = torch.get_num_threads()
print("Nombre de cœurs logiques (threads disponibles):", logical_cores)


Nombre de cœurs physiques: 8
Nombre de cœurs logiques (threads disponibles): 4


In [32]:
import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from src import Net

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
freq_bins = 64
time_steps = 64


model = Net(freq_bins, time_steps).to(device)  
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)  

num_epochs = 50

for epoch in range(num_epochs):
    print(f"Epoch {epoch + 1}/{num_epochs}, start training...")
    model.train()  
    running_loss = 0.0

    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)  

        optimizer.zero_grad()

        outputs = model(inputs)

        loss = criterion(outputs, labels)

        loss.backward()
        optimizer.step()

        running_loss += loss.item()  

    avg_loss = running_loss / len(train_loader)
    print(f"Epoch [{epoch + 1}/{num_epochs}], Loss: {avg_loss:.4f}")
freq_bins = 64
time_steps = 64

torch.save(model.state_dict(), f"simple_cnn_model_reduced_first_layer{freq_bins}x{time_steps}.pth")
print("Model saved successfully.")

Epoch 1/50, start training...
Epoch [1/50], Loss: 1.1642
Epoch 2/50, start training...
Epoch [2/50], Loss: 0.6908
Epoch 3/50, start training...
Epoch [3/50], Loss: 0.6834
Epoch 4/50, start training...
Epoch [4/50], Loss: 0.6843
Epoch 5/50, start training...
Epoch [5/50], Loss: 0.6816
Epoch 6/50, start training...
Epoch [6/50], Loss: 0.6635
Epoch 7/50, start training...
Epoch [7/50], Loss: 0.6109
Epoch 8/50, start training...
Epoch [8/50], Loss: 0.5317
Epoch 9/50, start training...
Epoch [9/50], Loss: 0.4996
Epoch 10/50, start training...
Epoch [10/50], Loss: 0.4458
Epoch 11/50, start training...
Epoch [11/50], Loss: 0.4135
Epoch 12/50, start training...
Epoch [12/50], Loss: 0.3856
Epoch 13/50, start training...
Epoch [13/50], Loss: 0.3914
Epoch 14/50, start training...
Epoch [14/50], Loss: 0.3943
Epoch 15/50, start training...
Epoch [15/50], Loss: 0.3805
Epoch 16/50, start training...
Epoch [16/50], Loss: 0.3664
Epoch 17/50, start training...
Epoch [17/50], Loss: 0.3539
Epoch 18/50, st

### Evaluate Model

In [33]:
import torch
from sklearn.metrics import f1_score, precision_score, recall_score, accuracy_score

from src import Net
freq_bins = 64
time_steps = 64

model = Net(freq_bins, time_steps)
model.load_state_dict(torch.load(f"simple_cnn_model_reduced_first_layer{freq_bins}x{time_steps}.pth"))
print("the model is loaded")
model.eval()

all_labels = []
all_predictions = []

occurence = 0
with torch.no_grad():
    length = len(test_loader)
    for inputs, labels in test_loader:
        print(f"start testing occurence {occurence + 1}/{length}")
        occurence += 1
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)

        all_labels.extend(labels.cpu().numpy())
        all_predictions.extend(predicted.cpu().numpy())

f1 = f1_score(all_labels, all_predictions, average="binary")
precision = precision_score(all_labels, all_predictions, average="binary")
recall = recall_score(all_labels, all_predictions, average="binary")
accuracy = accuracy_score(all_labels, all_predictions)

# Print metrics
print(f"F1-Score: {f1:.4f}")
print(f"Precision: {precision:.4f}")
print(f"Recall: {recall:.4f}")
print(f"Accuracy: {accuracy:.4f}")

  model.load_state_dict(torch.load(f"simple_cnn_model_reduced_first_layer{freq_bins}x{time_steps}.pth"))


the model is loaded
start testing occurence 1/30
start testing occurence 2/30
start testing occurence 3/30
start testing occurence 4/30
start testing occurence 5/30
start testing occurence 6/30
start testing occurence 7/30
start testing occurence 8/30
start testing occurence 9/30
start testing occurence 10/30
start testing occurence 11/30
start testing occurence 12/30
start testing occurence 13/30
start testing occurence 14/30
start testing occurence 15/30
start testing occurence 16/30
start testing occurence 17/30
start testing occurence 18/30
start testing occurence 19/30
start testing occurence 20/30
start testing occurence 21/30
start testing occurence 22/30
start testing occurence 23/30
start testing occurence 24/30
start testing occurence 25/30
start testing occurence 26/30
start testing occurence 27/30
start testing occurence 28/30
start testing occurence 29/30
start testing occurence 30/30
F1-Score: 0.8494
Precision: 0.9760
Recall: 0.7519
Accuracy: 0.8500
